[tor-commits] [metrics-web/master] Include partials in download performance graphs.

karsten at torproject.org karsten at torproject.org
Mon May 25 14:21:12 UTC 2020


commit 110cb010bd29b24902b62273b96566bf086da270
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Sun May 24 08:43:10 2020 +0200

    Include partials in download performance graphs.
    
    As part of #34023, we're going to give up on making 50 KiB and 1 MiB
    downloads in the near future. In order to continue plotting time to
    download these file sizes we're including partial downloads of larger
    file sizes.
    
    Implements #30611.
---
 src/main/R/rserver/rserve-init.R                   |  8 ++-
 .../torproject/metrics/stats/onionperf/Main.java   | 47 +++++++++++++++++-
 src/main/resources/web/json/metrics.json           |  3 +-
 .../resources/web/jsps/reproducible-metrics.jsp    |  5 +-
 src/main/resources/web/jsps/stats.jsp              |  9 ++--
 src/main/sql/onionperf/init-onionperf.sql          | 57 ++++++++++++++++++++++
 6 files changed, 115 insertions(+), 14 deletions(-)

diff --git a/src/main/R/rserver/rserve-init.R b/src/main/R/rserver/rserve-init.R
index e1074ee..3ad89ce 100644
--- a/src/main/R/rserver/rserve-init.R
+++ b/src/main/R/rserver/rserve-init.R
@@ -567,7 +567,8 @@ plot_relayflags <- function(start_p, end_p, flag_p, path_p) {
 
 prepare_torperf <- function(start_p = NULL, end_p = NULL, server_p = NULL,
     filesize_p = NULL) {
-  read_csv(file = paste(stats_dir, "torperf-1.1.csv", sep = ""),
+  read_csv(file = paste(stats_dir, "onionperf-including-partials.csv",
+        sep = ""),
       col_types = cols(
         date = col_date(format = ""),
         filesize = col_double(),
@@ -575,10 +576,7 @@ prepare_torperf <- function(start_p = NULL, end_p = NULL, server_p = NULL,
         server = col_character(),
         q1 = col_double(),
         md = col_double(),
-        q3 = col_double(),
-        timeouts = col_skip(),
-        failures = col_skip(),
-        requests = col_skip())) %>%
+        q3 = col_double())) %>%
     filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
     filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
     filter(if (!is.null(server_p)) server == server_p else TRUE) %>%
diff --git a/src/main/java/org/torproject/metrics/stats/onionperf/Main.java b/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
index 56a689b..02fa08b 100644
--- a/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/onionperf/Main.java
@@ -46,6 +46,9 @@ public class Main {
     logger.info("Starting onionperf module.");
     Connection connection = connectToDatabase();
     importOnionPerfFiles(connection);
+    writeStatistics(
+        new File(baseDir, "stats/onionperf-including-partials.csv").toPath(),
+        queryOnionperfIncludingPartials(connection));
     writeStatistics(new File(baseDir, "stats/torperf-1.1.csv").toPath(),
         queryOnionPerf(connection));
     writeStatistics(new File(baseDir, "stats/buildtimes.csv").toPath(),
@@ -83,7 +86,10 @@ public class Main {
         + "dataperc60, dataperc70, dataperc80, dataperc90, dataperc100, "
         + "launch, used_at, timeout, quantile, circ_id, used_by, "
         + "endpointlocal, endpointproxy, endpointremote, hostnamelocal, "
-        + "hostnameremote, sourceaddress) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, "
+        + "hostnameremote, sourceaddress, partial10240, partial20480, "
+        + "partial51200, partial102400, partial204800, partial512000, "
+        + "partial1048576, partial2097152, partial5242880) "
+        + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
         + "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
         + "?, ?, ?, ?, ?, ?)", Statement.RETURN_GENERATED_KEYS);
 
@@ -195,6 +201,18 @@ public class Main {
                 truncateString(onionPerfStrings[j], 64));
           }
         }
+        int[] partialBytes = new int[] { 10240, 20480, 51200, 102400, 204800,
+            512000, 1048576, 2097152, 5242880 };
+        for (int i = 38, j = 0; j < partialBytes.length; i++, j++) {
+          if (null == tr.getPartials()
+              || !tr.getPartials().containsKey(partialBytes[j])) {
+            psMeasurementsInsert.setNull(i, Types.INTEGER);
+          } else {
+            psMeasurementsInsert.setInt(i,
+                (int) (tr.getPartials().get(partialBytes[j])
+                - tr.getStartMillis()));
+          }
+        }
         psMeasurementsInsert.execute();
         try (ResultSet rs = psMeasurementsInsert.getGeneratedKeys()) {
           if (rs.next()) {
@@ -238,9 +256,34 @@ public class Main {
     return originalString;
   }
 
+  static List<String> queryOnionperfIncludingPartials(Connection connection)
+      throws SQLException {
+    logger.info("Querying statistics including partials from database.");
+    List<String> statistics = new ArrayList<>();
+    statistics
+        .add("date,filesize,source,server,q1,md,q3");
+    Statement st = connection.createStatement();
+    String queryString = "SELECT date, filesize, source, server, q1, md, q3 "
+        + " FROM onionperf_including_partials";
+    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+    try (ResultSet rs = st.executeQuery(queryString)) {
+      while (rs.next()) {
+        statistics.add(String.format("%s,%d,%s,%s,%.0f,%.0f,%.0f",
+            dateFormat.format(rs.getDate("date")),
+            rs.getInt("filesize"),
+            getStringFromResultSet(rs, "source"),
+            getStringFromResultSet(rs, "server"),
+            getDoubleFromResultSet(rs, "q1"),
+            getDoubleFromResultSet(rs, "md"),
+            getDoubleFromResultSet(rs, "q3")));
+      }
+    }
+    return statistics;
+  }
+
   static List<String> queryOnionPerf(Connection connection)
       throws SQLException {
-    logger.info("Querying statistics from database.");
+    logger.info("Querying timeout and failure statistics from database.");
     List<String> statistics = new ArrayList<>();
     statistics
         .add("date,filesize,source,server,q1,md,q3,timeouts,failures,requests");
diff --git a/src/main/resources/web/json/metrics.json b/src/main/resources/web/json/metrics.json
index 5d4a6c7..f2b36a5 100644
--- a/src/main/resources/web/json/metrics.json
+++ b/src/main/resources/web/json/metrics.json
@@ -274,8 +274,7 @@
     "id": "torperf",
     "title": "Time to download files over Tor",
     "type": "Graph",
-    "description": "<p>This graph shows overall performance when downloading static files of different sizes over Tor, either from a server on the public internet or from an onion server.  The graph shows the range of measurements from first to third quartile, and highlights the median.  The slowest and fastest quarter of measurements are omitted from the graph.</p>",
-    "function": "torperf",
+    "description": "<p>This graph shows overall performance when downloading static files of different sizes over Tor, either from a server on the public internet or from an onion server.  Download times include complete downloads of the shown file size as well as partial downloads of larger file sizes.  The graph shows the range of measurements from first to third quartile, and highlights the median.  The slowest and fastest quarter of measurements are omitted from the graph.</p>",    "function": "torperf",
     "parameters": [
       "start",
       "end",
diff --git a/src/main/resources/web/jsps/reproducible-metrics.jsp b/src/main/resources/web/jsps/reproducible-metrics.jsp
index 922e458..0cdd873 100644
--- a/src/main/resources/web/jsps/reproducible-metrics.jsp
+++ b/src/main/resources/web/jsps/reproducible-metrics.jsp
@@ -694,6 +694,7 @@ Here we explain how we evaluate Torperf/OnionPerf measurement to obtain the same
 <li><code>DATACOMPLETE</code>: Download end time that is only set if the request succeeded.</li>
 <li><code>READBYTES</code>: Total number of bytes read, which indicates whether this request succeeded (if ≥ <code>FILESIZE</code>) or failed.</li>
 <li><code>DIDTIMEOUT</code>: 1 if the request timed out, 0 otherwise.</li>
+<li><code>PARTIAL51200</code> and <code>PARTIAL1048576</code>: Time when 51200 or 1048576 bytes were read.</li>
 <li><code>DATAPERCx</code>: Time when x% of expected bytes were read for x = { 10, 20, 50, 100 }.</li>
 <li><code>BUILDTIMES</code>: Comma-separated list of times when circuit hops were built, which includes all circuits used for making measurement requests, successful or not.</li>
 <li><code>ENDPOINTREMOTE</code>: Hostname, IP address, and port that was used to connect to the remote server; we use this to distinguish a request to a public server (if <code>ENDPOINTREMOTE</code> is not present or does not contain <code>".onion"</code> as substring) or to an onion server.</li>
@@ -703,7 +704,9 @@ Here we explain how we evaluate Torperf/OnionPerf measurement to obtain the same
 
 <p>Each of the measurement results parsed in the previous step constitutes a single measurement.
 We're first interested in statistics on download times for the <a href="/torperf.html">Time to download files over Tor</a> graph.
-Therefore we consider only measurements with <code>DATACOMPLETE > START</code>, for which we calculate the download time as: <code>DATACOMPLETE - START</code>.
+Therefore we consider complete downloads as well as partial downloads.
+For complete downloads we calculate the download time as <code>DATACOMPLETE - START</code> for measurements with <code>DATACOMPLETE > START</code>.
+For partial downloads of larger file sizes we calculate the download time as <code>PARTIAL51200 - START</code> for measurements with <code>PARTIAL51200 > START</code> and <code>FILESIZE > 51200</code>; and <code>PARTIAL1048576 - START</code> for measurements with <code>PARTIAL1048576 > START</code> and <code>FILESIZE > 1048576</code>.
 We then compute the 25th, 50th, and 75th percentile of download times by sorting download times, determining the percentile rank, and using linear interpolation between adjacent ranks.</p>
 
 <p>Next we're interested in the average throughput of measurements for the <a href="/onionperf-throughput.html">Throughput</a> graph.
diff --git a/src/main/resources/web/jsps/stats.jsp b/src/main/resources/web/jsps/stats.jsp
index 5abbee0..ad6c566 100644
--- a/src/main/resources/web/jsps/stats.jsp
+++ b/src/main/resources/web/jsps/stats.jsp
@@ -56,6 +56,7 @@ https://metrics.torproject.org/identifier.csv
 <li><b>October 2, 2019:</b> Added <a href="#webstats-tb-channel">Tor Browser updates by release channel</a> graph.</li>
 <li><b>December 18, 2019:</b> Added <a href="#bridgedb-transport">BridgeDB requests by requested transport</a> and <a href="#bridgedb-distributor">BridgeDB requests by distributor</a> graphs.</li>
 <li><b>May 24, 2020:</b> Included version 3 onion service measurements in <a href="#torperf">Time to download files over Tor</a>, <a href="#torperf-failures">Timeouts and failures of downloading files over Tor</a>, <a href="#onionperf-buildtimes">Circuit build times</a>, <a href="#onionperf-latencies">Circuit round-trip latencies</a> graphs.</li>
+<li><b>May 24, 2020:</b> Included partial downloads of larger file sizes in <a href="#torperf">Time to download files over Tor</a> graph.</li>
 </ul>
 
 </div>
@@ -549,7 +550,7 @@ Performance <a href="#performance" name="performance" class="anchor">#</a></h2>
 <li><b>start:</b> First UTC date (YYYY-MM-DD) to include in the file.</li>
 <li><b>end:</b> Last UTC date (YYYY-MM-DD) to include in the file.</li>
 <li><b>server:</b> Either <b>"public"</b> for requests to a server on the public internet, or <b>"onion"</b> for requests to an onion server.</li>
-<li><b>filesize:</b> Size of the downloaded file in bytes, with pre-defined possible values: <b>"50kb"</b>, <b>"1mb"</b>, or <b>"5mb"</b>.</li>
+<li><b>filesize:</b> Size of the completely or partially downloaded file in bytes, with pre-defined possible values: <b>"50kb"</b>, <b>"1mb"</b>, or <b>"5mb"</b>.</li>
 </ul>
 
 <h4>Columns</h4>
@@ -559,9 +560,9 @@ Performance <a href="#performance" name="performance" class="anchor">#</a></h2>
 <li><b>filesize:</b> Size of the downloaded file in bytes.</li>
 <li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements.</li>
 <li><b>server:</b> Either <b>"public"</b> if the request was made to a server on the public internet, or <b>"onion"</b> if the request was made to an onion server.</li>
-<li><b>q1:</b> First quartile of time in milliseconds until receiving the last byte.</li>
-<li><b>md:</b> Median of time in milliseconds until receiving the last byte.</li>
-<li><b>q3:</b> Third quartile of time in milliseconds until receiving the last byte.</li>
+<li><b>q1:</b> First quartile of time in milliseconds until receiving the number of bytes in <b>filesize</b>.</li>
+<li><b>md:</b> Median of time in milliseconds until receiving the number of bytes in <b>filesize</b>.</li>
+<li><b>q3:</b> Third quartile of time in milliseconds until receiving the number of bytes in <b>filesize</b>.</li>
 </ul>
 
 <h3>Timeouts and failures of downloading files over Tor
diff --git a/src/main/sql/onionperf/init-onionperf.sql b/src/main/sql/onionperf/init-onionperf.sql
index b6b226b..ebe9f77 100644
--- a/src/main/sql/onionperf/init-onionperf.sql
+++ b/src/main/sql/onionperf/init-onionperf.sql
@@ -17,6 +17,15 @@ CREATE TABLE IF NOT EXISTS measurements (
   writebytes INTEGER,
   readbytes INTEGER,
   didtimeout BOOLEAN,
+  partial10240 INTEGER,
+  partial20480 INTEGER,
+  partial51200 INTEGER,
+  partial102400 INTEGER,
+  partial204800 INTEGER,
+  partial512000 INTEGER,
+  partial1048576 INTEGER,
+  partial2097152 INTEGER,
+  partial5242880 INTEGER,
   dataperc0 INTEGER,
   dataperc10 INTEGER,
   dataperc20 INTEGER,
@@ -53,6 +62,54 @@ CREATE TABLE IF NOT EXISTS buildtimes (
 
 CREATE TYPE server AS ENUM ('public', 'onion');
 
+CREATE OR REPLACE VIEW onionperf_including_partials AS
+WITH measurements_including_partials AS (
+  SELECT start,
+    filesize,
+    source,
+    endpointremote,
+    datacomplete
+  FROM measurements
+  UNION
+  SELECT start,
+    51200 AS filesize,
+    source,
+    endpointremote,
+    partial51200 AS datacomplete
+  FROM measurements
+  WHERE filesize > 51200
+  AND partial51200 IS NOT NULL
+  UNION
+  SELECT start,
+    1048576 AS filesize,
+    source,
+    endpointremote,
+    partial1048576 AS datacomplete
+  FROM measurements
+  WHERE filesize > 1048576
+  AND partial1048576 IS NOT NULL
+), grouped AS (
+  SELECT DATE(start) AS date,
+    filesize,
+    source,
+    CASE WHEN endpointremote LIKE '%.onion:%' THEN 'onion'
+      ELSE 'public' END AS server,
+    CASE WHEN COUNT(*) > 0 THEN
+      PERCENTILE_CONT(ARRAY[0.25,0.5,0.75]) WITHIN GROUP(ORDER BY datacomplete)
+      ELSE NULL END AS q
+  FROM measurements_including_partials
+  GROUP BY date, filesize, source, server
+)
+SELECT date,
+  filesize,
+  source,
+  server,
+  CASE WHEN q IS NULL THEN NULL ELSE TRUNC(q[1]) END AS q1,
+  CASE WHEN q IS NULL THEN NULL ELSE TRUNC(q[2]) END AS md,
+  CASE WHEN q IS NULL THEN NULL ELSE TRUNC(q[3]) END AS q3
+FROM grouped
+ORDER BY date, filesize, source, server;
+
 CREATE OR REPLACE VIEW onionperf AS
 SELECT date,
   filesize,



More information about the tor-commits mailing list