[tor-commits] [metrics-web/release] Add OnionPerf throughput graph.

karsten at torproject.org karsten at torproject.org
Sat Nov 9 21:45:07 UTC 2019


commit d4452b38183e50d20c29bad6fff51bae58ebde0c
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Sun Jun 2 15:56:50 2019 +0200

    Add OnionPerf throughput graph.
    
    We calculate throughput from the time between receiving 0.5 and 1 MiB
    of a response, which obviously excludes any measurements with
    responses smaller than 1 MiB. From the FILESIZE and DATAPERC* fields
    we can compute the number of milliseconds that have elapsed between
    receiving bytes 524,288 and 1,048,576, which is a total of 524,288
    bytes or 4,194,304 bits. We divide the value 4,194,304 by this time
    difference to obtain throughput in bits per millisecond which happens
    to be the same value as the number of kilobits per second.
    
    Implements #29772.
---
 src/main/R/rserver/rserve-init.R                   | 40 +++++++++++++++++++
 .../torproject/metrics/stats/onionperf/Main.java   | 30 +++++++++++++++
 src/main/resources/web.xml                         |  4 ++
 src/main/resources/web/json/categories.json        |  3 +-
 src/main/resources/web/json/metrics.json           | 12 ++++++
 .../resources/web/jsps/reproducible-metrics.jsp    |  9 ++++-
 src/main/resources/web/jsps/stats.jsp              | 26 +++++++++++++
 src/main/sql/onionperf/init-onionperf.sql          | 45 ++++++++++++++++++++++
 src/submods/metrics-lib                            |  2 +-
 9 files changed, 168 insertions(+), 3 deletions(-)

diff --git a/src/main/R/rserver/rserve-init.R b/src/main/R/rserver/rserve-init.R
index 88aa5b9..f74fd03 100644
--- a/src/main/R/rserver/rserve-init.R
+++ b/src/main/R/rserver/rserve-init.R
@@ -697,6 +697,46 @@ plot_onionperf_latencies <- function(start_p, end_p, server_p, path_p) {
   ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
 }
 
+prepare_onionperf_throughput <- function(start_p = NULL, end_p = NULL,
+    server_p = NULL) {
+  read_csv(file = paste(stats_dir, "onionperf-throughput.csv", sep = ""),
+      col_types = cols(
+        date = col_date(format = ""),
+        source = col_character(),
+        server = col_character(),
+        low = col_double(),
+        q1 = col_double(),
+        md = col_double(),
+        q3 = col_double(),
+        high = col_double())) %>%
+    filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
+    filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
+    filter(if (!is.null(server_p)) server == server_p else TRUE)
+}
+
+plot_onionperf_throughput <- function(start_p, end_p, server_p, path_p) {
+  prepare_onionperf_throughput(start_p, end_p, server_p) %>%
+    complete(date = full_seq(date, period = 1), nesting(source)) %>%
+    ggplot(aes(x = date, ymin = q1 / 1000, ymax = q3 / 1000, fill = source)) +
+    geom_ribbon(alpha = 0.5) +
+    geom_line(aes(y = md / 1000, colour = source), size = 0.75) +
+    geom_line(aes(y = high / 1000, colour = source), size = 0.375) +
+    geom_line(aes(y = low / 1000, colour = source), size = 0.375) +
+    scale_x_date(name = "", breaks = custom_breaks,
+      labels = custom_labels, minor_breaks = custom_minor_breaks) +
+    scale_y_continuous(name = "", labels = unit_format(unit = "Mbps"),
+      limits = c(0, NA)) +
+    scale_fill_hue(name = "Source") +
+    scale_colour_hue(name = "Source") +
+    facet_grid(source ~ ., scales = "free", space = "free") +
+    ggtitle(paste("Throughput when downloading from", server_p, "server")) +
+    labs(caption = copyright_notice) +
+    theme(legend.position = "none",
+          strip.text.y = element_text(angle = 0, hjust = 0),
+          strip.background = element_rect(fill = NA))
+  ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
+}
+
 prepare_connbidirect <- function(start_p = NULL, end_p = NULL) {
   read_csv(file = paste(stats_dir, "connbidirect2.csv", sep = ""),
       col_types = cols(
diff --git a/src/main/java/org/torproject/metrics/stats/onionperf/Main.java b/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
index a75cd1b..8fb762d 100644
--- a/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
@@ -55,6 +55,9 @@ public class Main {
         queryBuildTimes(connection));
     writeStatistics(new File(baseDir, "stats/latencies.csv").toPath(),
         queryLatencies(connection));
+    writeStatistics(
+        new File(baseDir, "stats/onionperf-throughput.csv").toPath(),
+        queryThroughput(connection));
     disconnectFromDatabase(connection);
     log.info("Terminated onionperf module.");
   }
@@ -321,6 +324,33 @@ public class Main {
     return statistics;
   }
 
+  static List<String> queryThroughput(Connection connection)
+      throws SQLException {
+    log.info("Querying throughput statistics from database.");
+    List<String> statistics = new ArrayList<>();
+    statistics.add("date,source,server,low,q1,md,q3,high");
+    Statement st = connection.createStatement();
+    String queryString = "SELECT date, source, server, low, q1, md, q3, high "
+        + "FROM throughput_stats";
+    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd", Locale.US);
+    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+    try (ResultSet rs = st.executeQuery(queryString)) {
+      while (rs.next()) {
+        statistics.add(String.format("%s,%s,%s,%d,%d,%d,%d,%d",
+            dateFormat.format(rs.getDate("date", calendar)),
+            getStringFromResultSet(rs, "source"),
+            rs.getString("server"),
+            rs.getInt("low"),
+            rs.getInt("q1"),
+            rs.getInt("md"),
+            rs.getInt("q3"),
+            rs.getInt("high")));
+      }
+    }
+    return statistics;
+  }
+
   /** Retrieves the <code>String</code> value of the designated column in the
    * current row of the given <code>ResultSet</code> object, or returns the
    * empty string if the retrieved value was <code>NULL</code>. */
diff --git a/src/main/resources/web.xml b/src/main/resources/web.xml
index b643b89..045dd2e 100644
--- a/src/main/resources/web.xml
+++ b/src/main/resources/web.xml
@@ -43,6 +43,7 @@
     <url-pattern>/torperf-failures.html</url-pattern>
     <url-pattern>/onionperf-buildtimes.html</url-pattern>
     <url-pattern>/onionperf-latencies.html</url-pattern>
+    <url-pattern>/onionperf-throughput.html</url-pattern>
     <url-pattern>/connbidirect.html</url-pattern>
     <url-pattern>/hidserv-dir-onions-seen.html</url-pattern>
     <url-pattern>/hidserv-rend-relayed-cells.html</url-pattern>
@@ -135,6 +136,9 @@
     <url-pattern>/onionperf-latencies.png</url-pattern>
     <url-pattern>/onionperf-latencies.pdf</url-pattern>
     <url-pattern>/onionperf-latencies.csv</url-pattern>
+    <url-pattern>/onionperf-throughput.png</url-pattern>
+    <url-pattern>/onionperf-throughput.pdf</url-pattern>
+    <url-pattern>/onionperf-throughput.csv</url-pattern>
     <url-pattern>/connbidirect.png</url-pattern>
     <url-pattern>/connbidirect.pdf</url-pattern>
     <url-pattern>/connbidirect.csv</url-pattern>
diff --git a/src/main/resources/web/json/categories.json b/src/main/resources/web/json/categories.json
index d19aeca..ad0df11 100644
--- a/src/main/resources/web/json/categories.json
+++ b/src/main/resources/web/json/categories.json
@@ -62,7 +62,8 @@
       "torperf",
       "torperf-failures",
       "onionperf-buildtimes",
-      "onionperf-latencies"
+      "onionperf-latencies",
+      "onionperf-throughput"
     ]
   },
   {
diff --git a/src/main/resources/web/json/metrics.json b/src/main/resources/web/json/metrics.json
index 006de8b..bfcde22 100644
--- a/src/main/resources/web/json/metrics.json
+++ b/src/main/resources/web/json/metrics.json
@@ -309,6 +309,18 @@
     ]
   },
   {
+    "id": "onionperf-throughput",
+    "title": "Throughput",
+    "type": "Graph",
+    "description": "<p>This graph shows throughput when downloading static files of different sizes over Tor, either from a server on the public internet or from a version 2 onion server. Throughput is calculated from the time between receiving 0.5 and 1 MiB of the response. The graph shows the median of measurements as thick line, the range of measurements from first to third quartile as ribbon, and the highest and lowest non-outlier measurements as thin lines.</p>",
+    "function": "onionperf_throughput",
+    "parameters": [
+      "start",
+      "end",
+      "server"
+    ]
+  },
+  {
     "id": "connbidirect",
     "title": "Fraction of connections used uni-/bidirectionally",
     "type": "Graph",
diff --git a/src/main/resources/web/jsps/reproducible-metrics.jsp b/src/main/resources/web/jsps/reproducible-metrics.jsp
index a833b31..aee4d5c 100644
--- a/src/main/resources/web/jsps/reproducible-metrics.jsp
+++ b/src/main/resources/web/jsps/reproducible-metrics.jsp
@@ -619,6 +619,7 @@ Here we explain how we evaluate Torperf/OnionPerf measurement to obtain the same
 <li>Timeouts and failures of downloading files over Tor <a href="/torperf-failures.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
 <li>Circuit build times <a href="/onionperf-buildtimes.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
 <li>Circuit round-trip latencies <a href="/onionperf-latencies.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
+<li>Throughput <a href="/onionperf-throughput.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a></li>
 </ul>
 
 <h4>Step 1: Parse OnionPerf and/or Torperf measurement results</h4>
@@ -636,17 +637,23 @@ Here we explain how we evaluate Torperf/OnionPerf measurement to obtain the same
 <li><code>DATACOMPLETE</code>: Download end time that is only set if the request succeeded.</li>
 <li><code>READBYTES</code>: Total number of bytes read, which indicates whether this request succeeded (if ≥ <code>FILESIZE</code>) or failed.</li>
 <li><code>DIDTIMEOUT</code>: 1 if the request timed out, 0 otherwise.</li>
+<li><code>DATAPERCx</code>: Time when x% of expected bytes were read for x = { 10, 20, 50, 100 }.</li>
 <li><code>BUILDTIMES</code>: Comma-separated list of times when circuit hops were built, which includes all circuits used for making measurement requests, successful or not.</li>
 <li><code>ENDPOINTREMOTE</code>: Hostname, IP address, and port that was used to connect to the remote server; we use this to distinguish a request to a public server (if <code>ENDPOINTREMOTE</code> is not present or does not contain <code>".onion"</code> as substring) or to an onion server.</li>
 </ul>
 
 <h4>Step 2: Aggregate measurement results</h4>
 
-<p>Each of the measurement results parsed in the previous steps constitutes a single measurement.
+<p>Each of the measurement results parsed in the previous step constitutes a single measurement.
 We're first interested in statistics on download times for the <a href="/torperf.html">Time to download files over Tor</a> graph.
 Therefore we consider only measurements with <code>DATACOMPLETE > START</code>, for which we calculate the download time as: <code>DATACOMPLETE - START</code>.
 We then compute the 25th, 50th, and 75th percentile of download times by sorting download times, determining the percentile rank, and using linear interpolation between adjacent ranks.</p>
 
+<p>Next we're interested in the average throughput of measurements for the <a href="/onionperf-throughput.html">Throughput</a> graph.
+We calculate throughput from the time between receiving 0.5 and 1 MiB of a response, which obviously excludes any measurements with responses smaller than 1 MiB.
+From <code>DATAPERC50</code> and <code>DATAPERC100</code> (if <code>FILESIZE = 1048576</code>) or <code>DATAPERC10</code> and <code>DATAPERC20</code> (if <code>FILESIZE = 5242880</code>) we can compute the number of milliseconds that have elapsed between receiving bytes 524,288 and 1,048,576, which is a total of 524,288 bytes or 4,194,304 bits.
+We divide the value 4,194,304 by this time difference to obtain throughput in bits per millisecond which happens to be the same value as the number of kilobits per second.</p>
+
 <p>We're also interested in circuit round-trip latencies for the <a href="/onionperf-latencies.html">Circuit round-trip latencies</a> graph.
 We measure circuit latency as the time between sending the HTTP request and receiving the HTTP response header.
 We calculate latencies as <code>DATARESPONSE - DATAREQUEST</code> for measurements with non-zero values for both timestamps.
diff --git a/src/main/resources/web/jsps/stats.jsp b/src/main/resources/web/jsps/stats.jsp
index 443c292..68c4114 100644
--- a/src/main/resources/web/jsps/stats.jsp
+++ b/src/main/resources/web/jsps/stats.jsp
@@ -570,6 +570,32 @@ Performance <a href="#performance" name="performance" class="anchor">#</a></h2>
 <li><b>high:</b> Highest latency within 1.5 IQR of upper quartile (upper whisker in a boxplot) of time in milliseconds between sending the HTTP request and receiving the HTTP response header.</li>
 </ul>
 
+<h3>Throughput
+<a href="/onionperf-throughput.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
+<a href="/onionperf-throughput.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
+<a href="#onionperf-throughput" name="onionperf-throughput" class="anchor">#</a></h3>
+
+<h4>Parameters</h4>
+
+<ul>
+<li><b>start:</b> First UTC date (YYYY-MM-DD) to include in the file.</li>
+<li><b>end:</b> Last UTC date (YYYY-MM-DD) to include in the file.</li>
+<li><b>server:</b> Either <b>"public"</b> for requests to a server on the public internet, or <b>"onion"</b> for requests to a version 2 onion server.</li>
+</ul>
+
+<h4>Columns</h4>
+
+<ul>
+<li><b>date:</b> UTC date (YYYY-MM-DD) when download performance was measured.</li>
+<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements.</li>
+<li><b>server:</b> Either <b>"public"</b> if the request was made to a server on the public internet, or <b>"onion"</b> if the request was made to a version 2 onion server.</li>
+<li><b>low:</b> Lowest measured throughput within 1.5 IQR of lower quartile (lower whisker in a boxplot) in kilobits per second.</li>
+<li><b>q1:</b> First quartile of measured throughput in kilobits per second.</li>
+<li><b>md:</b> Median of measured throughput in kilobits per second.</li>
+<li><b>q3:</b> Third quartile of measured throughput in kilobits per second.</li>
+<li><b>high:</b> Highest measured throughput within 1.5 IQR of upper quartile (upper whisker in a boxplot) in kilobits per second.</li>
+</ul>
+
 </div>
 
 <div class="container">
diff --git a/src/main/sql/onionperf/init-onionperf.sql b/src/main/sql/onionperf/init-onionperf.sql
index 536cd45..b7c41d6 100644
--- a/src/main/sql/onionperf/init-onionperf.sql
+++ b/src/main/sql/onionperf/init-onionperf.sql
@@ -164,3 +164,48 @@ FROM filtered_measurements NATURAL JOIN quartiles
 GROUP BY 1, 2, 3
 ORDER BY date, source, server;
 
+-- Explanation of the number 4194304 below for computing kbps: From the FILESIZE
+-- and DATAPERC* fields we can compute the number of milliseconds that have
+-- elapsed between receiving bytes 524,288 and 1,048,576, which is a total
+-- amount of 524,288 bytes or 4,194,304 bits. If we divide that value by
+-- 4,194,304 we obtain the number of milliseconds that have elapsed for
+-- downloading 1 bit, which happens to be the same value as the number of
+-- seconds for downloading 1 kilobit. We want the reciprocal of that value which
+-- has the unit kilobits per second.
+CREATE OR REPLACE VIEW throughput_stats AS
+WITH filtered_measurements AS (
+  SELECT DATE(start) AS date,
+    source,
+    CASE WHEN endpointremote LIKE '%.onion:%' THEN 'onion'
+      ELSE 'public' END AS server,
+    CASE WHEN filesize = 1048576 AND dataperc100 > dataperc50
+      THEN 4194304 / (dataperc100 - dataperc50)
+      WHEN filesize = 5242880 AND dataperc20 > dataperc10
+      THEN 4194304 / (dataperc20 - dataperc10)
+      ELSE NULL END AS kbps
+  FROM measurements
+  WHERE DATE(start) < current_date - 1
+  AND endpointremote NOT SIMILAR TO '_{56}.onion%'
+), quartiles AS (
+  SELECT date,
+    source,
+    server,
+    PERCENTILE_CONT(ARRAY[0.25,0.5,0.75])
+      WITHIN GROUP(ORDER BY kbps) AS q
+  FROM filtered_measurements
+  GROUP BY date, source, server
+)
+SELECT date,
+  source,
+  server,
+  MIN(CASE WHEN kbps >= q[1] - ((q[3] - q[1]) * 1.5)
+    THEN kbps ELSE NULL END) AS low,
+  TRUNC(AVG(q[1])) AS q1,
+  TRUNC(AVG(q[2])) AS md,
+  TRUNC(AVG(q[3])) AS q3,
+  MAX(CASE WHEN kbps <= q[3] + ((q[3] - q[1]) * 1.5)
+    THEN kbps ELSE NULL END) AS high
+FROM filtered_measurements NATURAL JOIN quartiles
+GROUP BY date, source, server
+ORDER BY date, source, server;
+
diff --git a/src/submods/metrics-lib b/src/submods/metrics-lib
index 3693e10..e723c06 160000
--- a/src/submods/metrics-lib
+++ b/src/submods/metrics-lib
@@ -1 +1 @@
-Subproject commit 3693e107a3aff7473200ece3ba3889dc9462c7b3
+Subproject commit e723c065b764ecfbb3bb96d4c491e67398b7f21b





More information about the tor-commits mailing list