[tor-commits] [metrics-web/release] Extend latency graph to contain high/low values.

karsten at torproject.org karsten at torproject.org
Sat Nov 9 21:45:07 UTC 2019


commit fd251d6c69efeb3d548f915cc40e001e89c1f41f
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed May 29 16:10:40 2019 +0200

    Extend latency graph to contain high/low values.
    
    This patch adds two new lines to the existing circuit round-trip
    latencies graph: lowest and highest measurements that are not
    outliers.
    
    Implements #29773.
---
 src/main/R/rserver/rserve-init.R                   | 15 ++++--
 .../torproject/metrics/stats/onionperf/Main.java   | 10 ++--
 src/main/resources/web/json/metrics.json           |  2 +-
 .../resources/web/jsps/reproducible-metrics.jsp    |  3 +-
 src/main/resources/web/jsps/stats.jsp              |  2 +
 src/main/sql/onionperf/init-onionperf.sql          | 58 +++++++++++-----------
 6 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/src/main/R/rserver/rserve-init.R b/src/main/R/rserver/rserve-init.R
index 3d6dad7..88aa5b9 100644
--- a/src/main/R/rserver/rserve-init.R
+++ b/src/main/R/rserver/rserve-init.R
@@ -663,9 +663,11 @@ prepare_onionperf_latencies <- function(start_p = NULL, end_p = NULL,
         date = col_date(format = ""),
         source = col_character(),
         server = col_character(),
+        low = col_double(),
         q1 = col_double(),
         md = col_double(),
-        q3 = col_double())) %>%
+        q3 = col_double(),
+        high = col_double())) %>%
     filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
     filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
     filter(if (!is.null(server_p)) server == server_p else TRUE)
@@ -675,18 +677,23 @@ plot_onionperf_latencies <- function(start_p, end_p, server_p, path_p) {
   prepare_onionperf_latencies(start_p, end_p, server_p) %>%
     filter(source != "") %>%
     complete(date = full_seq(date, period = 1), nesting(source)) %>%
-    ggplot(aes(x = date, y = md, ymin = q1, ymax = q3, fill = source)) +
+    ggplot(aes(x = date, ymin = q1, ymax = q3, fill = source)) +
     geom_ribbon(alpha = 0.5) +
-    geom_line(aes(colour = source), size = 0.75) +
+    geom_line(aes(y = md, colour = source), size = 0.75) +
+    geom_line(aes(y = high, colour = source), size = 0.375) +
+    geom_line(aes(y = low, colour = source), size = 0.375) +
     scale_x_date(name = "", breaks = custom_breaks,
       labels = custom_labels, minor_breaks = custom_minor_breaks) +
     scale_y_continuous(name = "", labels = unit_format(unit = "ms"),
       limits = c(0, NA)) +
     scale_fill_hue(name = "Source") +
     scale_colour_hue(name = "Source") +
+    facet_grid(source ~ ., scales = "free", space = "free") +
     ggtitle(paste("Circuit round-trip latencies to", server_p, "server")) +
     labs(caption = copyright_notice) +
-    theme(legend.position = "top")
+    theme(legend.position = "none",
+          strip.text.y = element_text(angle = 0, hjust = 0),
+          strip.background = element_rect(fill = NA))
   ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
 }
 
diff --git a/src/main/java/org/torproject/metrics/stats/onionperf/Main.java b/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
index a970434..a75cd1b 100644
--- a/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
@@ -298,22 +298,24 @@ public class Main {
       throws SQLException {
     log.info("Querying latency statistics from database.");
     List<String> statistics = new ArrayList<>();
-    statistics.add("date,source,server,q1,md,q3");
+    statistics.add("date,source,server,low,q1,md,q3,high");
     Statement st = connection.createStatement();
-    String queryString = "SELECT date, source, server, q1, md, q3 "
+    String queryString = "SELECT date, source, server, low, q1, md, q3, high "
         + "FROM latencies_stats";
     DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd", Locale.US);
     dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
     Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
     try (ResultSet rs = st.executeQuery(queryString)) {
       while (rs.next()) {
-        statistics.add(String.format("%s,%s,%s,%d,%d,%d",
+        statistics.add(String.format("%s,%s,%s,%d,%d,%d,%d,%d",
             dateFormat.format(rs.getDate("date", calendar)),
             getStringFromResultSet(rs, "source"),
             rs.getString("server"),
+            rs.getInt("low"),
             rs.getInt("q1"),
             rs.getInt("md"),
-            rs.getInt("q3")));
+            rs.getInt("q3"),
+            rs.getInt("high")));
       }
     }
     return statistics;
diff --git a/src/main/resources/web/json/metrics.json b/src/main/resources/web/json/metrics.json
index 5c99546..006de8b 100644
--- a/src/main/resources/web/json/metrics.json
+++ b/src/main/resources/web/json/metrics.json
@@ -300,7 +300,7 @@
     "id": "onionperf-latencies",
     "title": "Circuit round-trip latencies",
     "type": "Graph",
-    "description": "<p>This graph shows round-trip latencies of circuits used for downloading static files of different sizes over Tor, either from a server on the public internet or from a version 2 onion server. Round-trip latencies are measured as the time between sending the HTTP request and receiving the HTTP response header. The graph shows the range of measurements from first to third quartile, and highlights the median. The slowest and fastest quarter of measurements are omitted from the graph.</p>",
+    "description": "<p>This graph shows round-trip latencies of circuits used for downloading static files of different sizes over Tor, either from a server on the public internet or from a version 2 onion server. Round-trip latencies are measured as the time between sending the HTTP request and receiving the HTTP response header. The graph shows the median of measurements as thick line, the range of measurements from first to third quartile as ribbon, and the highest and lowest non-outlier measurements as thin lines.</p>",
     "function": "onionperf_latencies",
     "parameters": [
       "start",
diff --git a/src/main/resources/web/jsps/reproducible-metrics.jsp b/src/main/resources/web/jsps/reproducible-metrics.jsp
index 3838972..a833b31 100644
--- a/src/main/resources/web/jsps/reproducible-metrics.jsp
+++ b/src/main/resources/web/jsps/reproducible-metrics.jsp
@@ -650,7 +650,8 @@ We then compute the 25th, 50th, and 75th percentile of download times by sorting
 <p>We're also interested in circuit round-trip latencies for the <a href="/onionperf-latencies.html">Circuit round-trip latencies</a> graph.
 We measure circuit latency as the time between sending the HTTP request and receiving the HTTP response header.
 We calculate latencies as <code>DATARESPONSE - DATAREQUEST</code> for measurements with non-zero values for both timestamps.
-We then compute 25th, 50th, and 75th percentiles in the same way as for download times above.</p>
+We then compute 25th, 50th, and 75th percentiles in the same way as for download times above.
+We also compute the lowest latency within 1.5 IQR of the lower quartile and the highest latency within 1.5 IQR of the upper quartile.</p>
 
 <p>Ideally, all measurements would succeed.
 But it's also possible that some measurements did not complete within a pre-defined timeout or failed for some other reason.
diff --git a/src/main/resources/web/jsps/stats.jsp b/src/main/resources/web/jsps/stats.jsp
index 6c1955a..443c292 100644
--- a/src/main/resources/web/jsps/stats.jsp
+++ b/src/main/resources/web/jsps/stats.jsp
@@ -563,9 +563,11 @@ Performance <a href="#performance" name="performance" class="anchor">#</a></h2>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when download performance was measured.</li>
 <li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements.</li>
 <li><b>server:</b> Either <b>"public"</b> if the request was made to a server on the public internet, or <b>"onion"</b> if the request was made to a version 2 onion server.</li>
+<li><b>low:</b> Lowest latency within 1.5 IQR of lower quartile (lower whisker in a boxplot) of time in milliseconds between sending the HTTP request and receiving the HTTP response header.</li>
 <li><b>q1:</b> First quartile of time in milliseconds between sending the HTTP request and receiving the HTTP response header.</li>
 <li><b>md:</b> Median of time in milliseconds between sending the HTTP request and receiving the HTTP response header.</li>
 <li><b>q3:</b> Third quartile of time in milliseconds between sending the HTTP request and receiving the HTTP response header.</li>
+<li><b>high:</b> Highest latency within 1.5 IQR of upper quartile (upper whisker in a boxplot) of time in milliseconds between sending the HTTP request and receiving the HTTP response header.</li>
 </ul>
 
 </div>
diff --git a/src/main/sql/onionperf/init-onionperf.sql b/src/main/sql/onionperf/init-onionperf.sql
index 7c0176e..536cd45 100644
--- a/src/main/sql/onionperf/init-onionperf.sql
+++ b/src/main/sql/onionperf/init-onionperf.sql
@@ -130,37 +130,37 @@ GROUP BY date, 2, position) sub
 ORDER BY date, source, position;
 
 CREATE OR REPLACE VIEW latencies_stats AS
+WITH filtered_measurements AS (
+  SELECT DATE(start) AS date,
+    source,
+    CASE WHEN endpointremote LIKE '%.onion:%' THEN 'onion'
+      ELSE 'public' END AS server,
+    dataresponse - datarequest AS latency
+  FROM measurements
+  WHERE DATE(start) < current_date - 1
+  AND datarequest > 0
+  AND dataresponse > 0
+  AND endpointremote NOT SIMILAR TO '_{56}.onion%'
+), quartiles AS (
+  SELECT date,
+    source,
+    server,
+    PERCENTILE_CONT(ARRAY[0.25,0.5,0.75])
+      WITHIN GROUP(ORDER BY latency) AS q
+  FROM filtered_measurements
+  GROUP BY date, source, server
+)
 SELECT date,
   source,
   server,
-  TRUNC(q[1]) AS q1,
-  TRUNC(q[2]) AS md,
-  TRUNC(q[3]) AS q3
-FROM (
-SELECT DATE(start) AS date,
-  source,
-  CASE WHEN endpointremote LIKE '%.onion:%' THEN 'onion'
-    ELSE 'public' END AS server,
-  PERCENTILE_CONT(ARRAY[0.25,0.5,0.75])
-  WITHIN GROUP(ORDER BY dataresponse - datarequest) AS q
-FROM measurements
-WHERE DATE(start) < current_date - 1
-AND datarequest > 0
-AND dataresponse > 0
-AND endpointremote NOT SIMILAR TO '_{56}.onion%'
-GROUP BY date, source, server
-UNION
-SELECT DATE(start) AS date,
-  '' AS source,
-  CASE WHEN endpointremote LIKE '%.onion:%' THEN 'onion'
-    ELSE 'public' END AS server,
-  PERCENTILE_CONT(ARRAY[0.25,0.5,0.75])
-  WITHIN GROUP(ORDER BY dataresponse - datarequest) AS q
-FROM measurements
-WHERE DATE(start) < current_date - 1
-AND datarequest > 0
-AND dataresponse > 0
-AND endpointremote NOT SIMILAR TO '_{56}.onion%'
-GROUP BY date, 2, server) sub
+  MIN(CASE WHEN latency >= q[1] - ((q[3] - q[1]) * 1.5)
+    THEN latency ELSE NULL END) AS low,
+  TRUNC(AVG(q[1])) AS q1,
+  TRUNC(AVG(q[2])) AS md,
+  TRUNC(AVG(q[3])) AS q3,
+  MAX(CASE WHEN latency <= q[3] + ((q[3] - q[1]) * 1.5)
+    THEN latency ELSE NULL END) AS high
+FROM filtered_measurements NATURAL JOIN quartiles
+GROUP BY 1, 2, 3
 ORDER BY date, source, server;
 





More information about the tor-commits mailing list