[tor-commits] [metrics-web/master] Make suggested changes to per-graph CSV files.

karsten at torproject.org karsten at torproject.org
Wed Aug 15 10:53:22 UTC 2018


commit 0e6936003d0b9b4cc8d9388ea8a56eadcfdce1f9
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Aug 9 16:08:17 2018 +0200

    Make suggested changes to per-graph CSV files.
    
    Implements #26998.
---
 src/main/R/clients/split-clients.R    |   6 +-
 src/main/R/rserver/graphs.R           |  70 ++++++----------
 src/main/resources/web/jsps/stats.jsp | 153 ++++++++--------------------------
 3 files changed, 64 insertions(+), 165 deletions(-)

diff --git a/src/main/R/clients/split-clients.R b/src/main/R/clients/split-clients.R
index 50b03d4..9f80902 100644
--- a/src/main/R/clients/split-clients.R
+++ b/src/main/R/clients/split-clients.R
@@ -1,12 +1,12 @@
 dir.create("RData", showWarnings = FALSE)
 
 c <- read.csv("clients.csv", stringsAsFactors = FALSE)
-data <- c[c$node == 'relay', !(names(c) %in% c("node", "frac"))]
+data <- c[c$node == 'relay', !(names(c) %in% c("node"))]
 save(data, file = "RData/clients-relay.RData")
-data <- c[c$node == 'bridge', !(names(c) %in% c("node", "frac"))]
+data <- c[c$node == 'bridge', !(names(c) %in% c("node"))]
 save(data, file = "RData/clients-bridge.RData")
 
 u <- read.csv("userstats-combined.csv", stringsAsFactors = FALSE)
-data <- u[, !(names(u) %in% c("node", "version", "frac"))]
+data <- u[, !(names(u) %in% c("node", "version"))]
 save(data, file = "RData/userstats-bridge-combined.RData")
 
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index 12a80e9..ab37a32 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -423,7 +423,6 @@ plot_versions <- function(start_p, end_p, path_p) {
 
 write_versions <- function(start_p = NULL, end_p = NULL, path_p) {
   prepare_versions(start_p, end_p) %>%
-    spread(key = "version", value = "relays", fill = 0) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -459,6 +458,7 @@ plot_platforms <- function(start_p, end_p, path_p) {
 
 write_platforms <- function(start_p = NULL, end_p = NULL, path_p) {
   prepare_platforms(start_p, end_p) %>%
+    mutate(platform = tolower(platform)) %>%
     spread(platform, relays) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
@@ -607,8 +607,6 @@ plot_relayflags <- function(start_p, end_p, flag_p, path_p) {
 write_relayflags <- function(start_p = NULL, end_p = NULL, flag_p = NULL,
     path_p) {
   prepare_relayflags(start_p, end_p, flag_p) %>%
-    mutate(flag = tolower(flag)) %>%
-    spread(flag, relays) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -832,9 +830,6 @@ plot_connbidirect <- function(start_p, end_p, path_p) {
 write_connbidirect <- function(start_p = NULL, end_p = NULL, path_p) {
   prepare_connbidirect(start_p, end_p) %>%
     rename(q1 = X0.25, md = X0.5, q3 = X0.75) %>%
-    gather(variable, value, -(date:direction)) %>%
-    unite(temp, direction, variable) %>%
-    spread(temp, value) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -900,7 +895,8 @@ plot_userstats <- function(start_p, end_p, node_p, variable_p, value_p,
     events_p, path_p) {
   load(paste(rdata_dir, "clients-", node_p, ".RData", sep = ""))
   c <- data
-  u <- c[c$date >= start_p & c$date <= end_p, ]
+  u <- c[c$date >= start_p & c$date <= end_p, c("date", "country", "transport",
+      "version", "lower", "upper", "clients")]
   u <- rbind(u, data.frame(date = start_p,
       country = ifelse(variable_p == "country" & value_p != "all", value_p, ""),
       transport = ifelse(variable_p == "transport", value_p, ""),
@@ -1053,8 +1049,7 @@ write_userstats_relay_country <- function(start_p = NULL, end_p = NULL,
       country == ifelse(country_p == "all", "", country_p) else TRUE) %>%
     filter(transport == "") %>%
     filter(version == "") %>%
-    mutate(downturns = clients < lower, upturns = clients > upper) %>%
-    select(date, country, clients, downturns, upturns, lower, upper) %>%
+    select(date, country, clients, lower, upper, frac) %>%
     rename(users = clients) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
@@ -1069,7 +1064,7 @@ write_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
       country == ifelse(country_p == "all", "", country_p) else TRUE) %>%
     filter(transport == "") %>%
     filter(version == "") %>%
-    select(date, country, clients) %>%
+    select(date, country, clients, frac) %>%
     rename(users = clients) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
@@ -1083,24 +1078,21 @@ write_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
     filter(country == "") %>%
     filter(version == "") %>%
     filter(transport != "") %>%
-    select(date, transport, clients)
+    select(date, transport, clients, frac)
   if (is.null(transport_p) || "!<OR>" %in% transport_p) {
     n <- u %>%
       filter(transport != "<OR>") %>%
-      group_by(date) %>%
+      group_by(date, frac) %>%
       summarize(clients = sum(clients))
     u <- rbind(u, data.frame(date = n$date, transport = "!<OR>",
-                             clients = n$clients))
+                             clients = n$clients, frac = n$frac))
   }
   u %>%
     filter(if (!is.null(transport_p)) transport %in% transport_p else TRUE) %>%
-    mutate(transport = ifelse(transport == "<OR>", "default_or_protocol",
-      ifelse(transport == "!<OR>", "any_pt",
-      ifelse(transport == "<??>", "unknown_pluggable_transports",
-      transport)))) %>%
     group_by(date, transport) %>%
-    select(date, transport, clients) %>%
-    spread(transport, clients) %>%
+    select(date, transport, clients, frac) %>%
+    rename(users = clients) %>%
+    arrange(date, transport) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -1113,7 +1105,7 @@ write_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
     filter(country == "") %>%
     filter(transport == "") %>%
     filter(if (!is.null(version_p)) version == version_p else TRUE) %>%
-    select(date, version, clients) %>%
+    select(date, version, clients, frac) %>%
     rename(users = clients) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
@@ -1159,12 +1151,8 @@ write_userstats_bridge_combined <- function(start_p = NULL, end_p = NULL,
     write_userstats_bridge_country(start_p, end_p, country_p, path_p)
   } else {
     prepare_userstats_bridge_combined(start_p, end_p, country_p) %>%
-      select(date, country, transport, low, high) %>%
-      mutate(transport = ifelse(transport == "<OR>", "default_or_protocol",
-        ifelse(transport == "<??>", "unknown_transport", transport))) %>%
-      gather(variable, value, -(date:transport)) %>%
-      unite(temp, transport, variable) %>%
-      spread(temp, value) %>%
+      select(date, country, transport, low, high, frac) %>%
+      arrange(date, country, transport) %>%
       write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
   }
 }
@@ -1201,8 +1189,8 @@ plot_advbwdist_perc <- function(start_p, end_p, p_p, path_p) {
 write_advbwdist_perc <- function(start_p = NULL, end_p = NULL, p_p = NULL,
     path_p) {
   prepare_advbwdist_perc(start_p, end_p, p_p) %>%
-    unite(temp, variable, percentile) %>%
-    spread(temp, advbw) %>%
+    spread(variable, advbw) %>%
+    rename(p = percentile) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -1238,8 +1226,8 @@ plot_advbwdist_relay <- function(start_p, end_p, n_p, path_p) {
 write_advbwdist_relay <- function(start_p = NULL, end_p = NULL, n_p = NULL,
     path_p) {
   prepare_advbwdist_relay(start_p, end_p, n_p) %>%
-    unite(temp, variable, relay) %>%
-    spread(temp, advbw) %>%
+    spread(variable, advbw) %>%
+    rename(n = relay) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
@@ -1249,7 +1237,7 @@ prepare_hidserv_dir_onions_seen <- function(start_p, end_p) {
     filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
     filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
     filter(type == "dir-onions-seen") %>%
-    transmute(date = date, onions = ifelse(frac >= 0.01, wiqm, NA))
+    transmute(date, onions = ifelse(frac >= 0.01, wiqm, NA), frac)
 }
 
 plot_hidserv_dir_onions_seen <- function(start_p, end_p, path_p) {
@@ -1277,7 +1265,7 @@ prepare_hidserv_rend_relayed_cells <- function(start_p, end_p) {
     filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
     filter(type == "rend-relayed-cells") %>%
     transmute(date,
-      relayed = ifelse(frac >= 0.01, wiqm * 8 * 512 / (86400 * 1e9), NA))
+      relayed = ifelse(frac >= 0.01, wiqm * 8 * 512 / (86400 * 1e9), NA), frac)
 }
 
 plot_hidserv_rend_relayed_cells <- function(start_p, end_p, path_p) {
@@ -1440,22 +1428,14 @@ plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
 # plot_webstats_tb_locale needs the preliminary data frame e for its
 # breaks and labels. Left as future work.
 write_webstats_tb_locale <- function(start_p = NULL, end_p = NULL, path_p) {
-  d <- read.csv(paste(stats_dir, "webstats.csv", sep = ""),
-    colClasses = c("log_date" = "Date", "locale" = "character"))
-  d <- d %>%
+  read.csv(paste(stats_dir, "webstats.csv", sep = ""),
+    colClasses = c("log_date" = "Date", "locale" = "character")) %>%
     filter(if (!is.null(start_p)) log_date >= as.Date(start_p) else TRUE) %>%
     filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
-    filter(request_type == "tbid")
-  e <- d
-  e <- aggregate(list(count = e$count), by = list(locale = e$locale), FUN = sum)
-  e <- e[order(e$count, decreasing = TRUE), ]
-  e <- e[1:5, ]
-  d <- aggregate(list(count = d$count), by = list(log_date = d$log_date,
-    locale = ifelse(d$locale %in% e$locale, d$locale, "other")), FUN = sum)
-  d %>%
-    mutate(locale = tolower(locale)) %>%
+    filter(request_type == "tbid") %>%
+    group_by(log_date, locale) %>%
+    summarize(initial_downloads = sum(count)) %>%
     rename(date = log_date) %>%
-    spread(locale, count) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
diff --git a/src/main/resources/web/jsps/stats.jsp b/src/main/resources/web/jsps/stats.jsp
index 1c18921..719176c 100644
--- a/src/main/resources/web/jsps/stats.jsp
+++ b/src/main/resources/web/jsps/stats.jsp
@@ -45,7 +45,7 @@ https://metrics.torproject.org/identifier.csv
 <li><b>February 28, 2018:</b> Added per-graph CSV files to eventually replace pre-aggregated CSV files.</li>
 <li><b>May 29, 2018:</b> Made all parameters of per-graph CSV files optional to support providing both pre-filtered and complete data sets.</li>
 <li><b>July 31, 2018:</b> Announced pending changes to per-graph CSV files to become effective on August 15 and pre-aggregated CSV files to be removed by September 15.</li>
-<li><b>August 15, 2018 (scheduled):</b> Make the first batch of changes to per-graph CSV files (marked as "Suggested change" below).</li>
+<li><b>August 15, 2018:</b> Made the first batch of changes to per-graph CSV files.</li>
 <li><b>September 15, 2018 (scheduled):</b> Remove all pre-aggregated CSV files.</li>
 </ul>
 
@@ -75,22 +75,11 @@ Users <a href="#users" name="users" class="anchor">#</a></h2>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which user numbers are estimated.</li>
 <li><b>country:</b> Two-letter lower-case country code as found in a GeoIP database by resolving clients' IP addresses, or <b>"??"</b> if client IP addresses could not be resolved. If this column contains the empty string, all clients are included, regardless of their country code.</li>
 <li><b>users:</b> Estimated number of clients.</li>
-<li><b>downturns:</b> Whether the estimated number of clients is below the lower number of expected clients, indicating a possible censorship-related event. If this column contains the empty string, there are no expectations on the number of clients.</li>
-<li><b>upturns:</b> Whether the estimated number of clients is above the upper number of expected clients, indicating a possible censorship-related event. If this column contains the empty string, there are no expectations on the number of clients.</li>
-<li><b>lower:</b> Lower number of expected clients under the assumption that there has been no censorship event. If this column contains the empty string, there are no expectations on the number of clients.</li>
-<li><b>upper:</b> Upper number of expected clients under the assumption that there has been no release of censorship. If this column contains the empty string, there are no expectations on the number of clients.</li>
+<li><b>lower:</b> Lower number of expected clients under the assumption that there has been no censorship event. If <b>users < lower</b>, a censorship-related event might have happened in this country on the given day. If this column contains the empty string, there are no expectations on the number of clients.</li>
+<li><b>upper:</b> Upper number of expected clients under the assumption that there has been no release of censorship. If <b>users > upper</b>, a censorship-related event might have happened in this country on the given day. If this column contains the empty string, there are no expectations on the number of clients.</li>
+<li><b>frac:</b> Fraction of relays in percent that the estimate is based on.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Remove the <b>downturns</b> and <b>upturns</b> columns which are trivial to compute as <b>users < lower</b> and <b>users > upper</b>, and which don't necessarily make the CSV file easier to handle. There could even be a gentle hint on computing the dots in the graph from two columns.</p>
-</div>
-
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column ("Fraction of relays (as value between 0 and 1) that the estimate is based on.") which might be relevant for pro users. Related to the discussion on <a href="https://bugs.torproject.org/26950">#26950</a>.</p>
-</div>
-
 <h3>Bridge users by country
 <a href="/userstats-bridge-country.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/userstats-bridge-country.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -110,13 +99,9 @@ Users <a href="#users" name="users" class="anchor">#</a></h2>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which user numbers are estimated.</li>
 <li><b>country:</b> Two-letter lower-case country code as found in a GeoIP database by resolving clients' IP addresses, or <b>"??"</b> if client IP addresses could not be resolved. If this column contains the empty string, all clients are included, regardless of their country code.</li>
 <li><b>users:</b> Estimated number of clients.</li>
+<li><b>frac:</b> Fraction of bridges in percent that the estimate is based on.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column ("Fraction of bridges (as value between 0 and 1) that the estimate is based on.") which might be relevant for pro users. Related to the discussion on <a href="https://bugs.torproject.org/26950">#26950</a>.</p>
-</div>
-
 <h3>Bridge users by transport
 <a href="/userstats-bridge-transport.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/userstats-bridge-transport.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -134,19 +119,11 @@ Users <a href="#users" name="users" class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which user numbers are estimated.</li>
-<li><b>$transport:</b> One or more columns with the estimated number of clients using transport with lower-case name <b>$transport</b> to connect to the Tor network using bridges. Examples for transport names are <b>"obfs4"</b>, <b>"websocket"</b> for Flash proxy/websocket, <b>"fte"</b> for FTE, <b>"any_pt"</b> for any pluggable transport, <b>"unknown_pluggable_transports"</b> for unknown pluggable transport(s), or <b>"default_or_protocol"</b> for the default OR protocol.</li>
+<li><b>transport:</b> Transport name used by clients to connect to the Tor network using bridges. Examples are <b>"obfs4"</b>, <b>"websocket"</b> for Flash proxy/websocket, <b>"fte"</b> for FTE, <b>"!<OR>"</b> for any pluggable transport, <b>"<??>"</b> for unknown pluggable transport(s), or <b>"<OR>"</b> for the default OR protocol.</li>
+<li><b>users:</b> Estimated number of clients.</li>
+<li><b>frac:</b> Fraction of bridges in percent that the estimate is based on.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>$transport</b> by a <b>transport</b> column for the transport name (written as non-percent-encoded !<OR>, <??>, and <OR> for consistency with the <b>transport</b> parameter) and a <b>users</b> column for the estimated number of clients, similar to the bridge users by country graph.</p>
-</div>
-
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column ("Fraction of bridges (as value between 0 and 1) that the estimate is based on.") which might be relevant for pro users. Related to the discussion on <a href="https://bugs.torproject.org/26950">#26950</a>.</p>
-</div>
-
 <h3>Bridge users by country and transport
 <a href="/userstats-bridge-combined.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/userstats-bridge-combined.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -165,20 +142,12 @@ Users <a href="#users" name="users" class="anchor">#</a></h2>
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which user numbers are estimated.</li>
 <li><b>country:</b> Two-letter lower-case country code as found in a GeoIP database by resolving clients' IP addresses, or <b>"??"</b> if client IP addresses could not be resolved.</li>
-<li><b>$transport_high:</b> Upper bound of estimated users from the given country and transport. Transport names are written in lower case, and the default OR protocol is written as <b>default_or_protocol</b>.</li>
-<li><b>$transport_low:</b> Lower bound of estimated users from the given country and transport. Transport names are written in lower case, and the default OR protocol is written as <b>default_or_protocol</b>.</li>
+<li><b>transport:</b> Transport name used by clients to connect to the Tor network using bridges. Examples are <b>"obfs4"</b>, <b>"websocket"</b> for Flash proxy/websocket, <b>"fte"</b> for FTE, <b>"<??>"</b> for unknown pluggable transport(s), or <b>"<OR>"</b> for the default OR protocol.</li>
+<li><b>high:</b> Upper bound of estimated users from the given country and transport.</li>
+<li><b>low:</b> Lower bound of estimated users from the given country and transport.</li>
+<li><b>frac:</b> Fraction of bridges in percent that the estimate is based on.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>$transport_high</b> and <b>$transport_low</b> by a <b>transport</b> column for the transport name (written as non-percent-encoded <OR> for consistency with the previous graph) and a <b>high</b> and a <b>low</b> column for the upper and lower bound.</p>
-</div>
-
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column ("Fraction of bridges (as value between 0 and 1) that the estimate is based on.") which might be relevant for pro users. Related to the discussion on <a href="https://bugs.torproject.org/26950">#26950</a>.</p>
-</div>
-
 <h3>Bridge users by IP version
 <a href="/userstats-bridge-version.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/userstats-bridge-version.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -199,15 +168,9 @@ using bridges, which can be either <b>"v4"</b> or <b>"v6"</b>.</li>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which user numbers are estimated.</li>
 <li><b>version:</b> IP version used by clients to connect to the Tor network using bridges, which can be either <b>"v4"</b> or <b>"v6"</b>. If this column contains the empty string, all clients are included, regardless of their IP version.</li>
 <li><b>users:</b> Estimated number of clients.</li>
+<li><b>frac:</b> Fraction of bridges in percent that the estimate is based on.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column ("Fraction of bridges (as value between 0 and 1) that the estimate is based on.") which might be relevant for pro users. Related to the discussion on <a href="https://bugs.torproject.org/26950">#26950</a>.</p>
-</div>
-
-</div>
-
 <div class="container">
 <h2><i class="fa fa-server fa-fw" aria-hidden="true"></i>
 Servers <a href="#servers" name="servers" class="anchor">#</a></h2>
@@ -249,14 +212,10 @@ Servers <a href="#servers" name="servers" class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when relays have been listed as running.</li>
-<li><b>$flag:</b> Average number of relays with the given relay flag in lower case, which can be <b>"exit"</b>, <b>"fast"</b>, <b>"guard"</b>, <b>"hsdir"</b>, <b>"fast"</b>, <b>"running"</b>, and <b>"stable"</b>.</li>
+<li><b>flag:</b> Relay flag, which can be <b>"Exit"</b>, <b>"Fast"</b>, <b>"Guard"</b>, <b>"HSDir"</b>, <b>"Fast"</b>, <b>"Running"</b>, or <b>"Stable"</b>.</li>
+<li><b>relays:</b> Average number of relays.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>$flag</b> columns by a <b>flag</b> and a <b>relays</b> column, and include the relay flag name in their original capitalization, rather than lower-cased.</p>
-</div>
-
 <h3>Relays by tor version
 <a href="/versions.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/versions.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -273,14 +232,10 @@ Servers <a href="#servers" name="servers" class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when relays have been listed as running.</li>
-<li><b>$version:</b> Average number of relays with the given first three dotted numbers of the Tor software version as reported by the relay. An example is <b>"0.3.4"</b>.
+<li><b>version:</b> First three dotted numbers of the Tor software version as reported by the relay. An example is <b>"0.3.4"</b>.</li>
+<li><b>relays:</b> Average number of relays.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>$version</b> columns by a <b>version</b> and a <b>relays</b> column.</p>
-</div>
-
 <h3>Relays by platform
 <a href="/platforms.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/platforms.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -297,18 +252,13 @@ Servers <a href="#servers" name="servers" class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when relays have been listed as running.</li>
-<li><b>BSD:</b> Average number of relays on *BSD.</li>
-<li><b>Linux:</b> Average number of relays on Linux.</li>
-<li><b>Other:</b> Average number of relays on another platform than Linux, *BSD, Windows, or macOS.</li>
-<li><b>Windows:</b> Average number of relays on Windows.</li>
-<li><b>macOS:</b> Average number of relays on macOS.</li>
+<li><b>bsd:</b> Average number of relays on *BSD.</li>
+<li><b>linux:</b> Average number of relays on Linux.</li>
+<li><b>macos:</b> Average number of relays on macOS.</li>
+<li><b>other:</b> Average number of relays on another platform than Linux, *BSD, Windows, or macOS.</li>
+<li><b>windows:</b> Average number of relays on Windows.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Change platform-specific columns to be all lower-case as a good practice to only use lower-cased column names everywhere.</p>
-</div>
-
 <h3>Relays by IP version
 <a href="/relays-ipv6.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/relays-ipv6.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -440,14 +390,9 @@ Traffic <a href="#traffic" name="traffic" class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) that relays reported bandwidth data for.</li>
-<li><b>all_$p:</b> Advertised bandwidth in Gbit/s of the p-th percentile of all relays.</li>
-<li><b>exits_$p:</b> Advertised bandwidth in Gbit/s of the p-th percentile of relays with the <b>"Exit"</b> relay flag.</li>
-</ul>
-
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>all_p$</b> and <b>exits_$p</b> columns by three columns <b>p</b>, <b>all</b>, and <b>exit</b>.</p>
-</div>
+<li><b>p:</b> Percentile as value between 0 and 100.</li>
+<li><b>all:</b> Advertised bandwidth in Gbit/s of the p-th percentile of all relays.</li>
+<li><b>exits:</b> Advertised bandwidth in Gbit/s of the p-th percentile of relays with the <b>"Exit"</b> relay flag.</li>
 
 <h3>Advertised bandwidth of n-th fastest relays
 <a href="/advbwdist-relay.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
@@ -466,15 +411,11 @@ Traffic <a href="#traffic" name="traffic" class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) that relays reported bandwidth data for.</li>
-<li><b>all_$n:</b> Advertised bandwidth in Gbit/s of n-th fastest relay.</li>
-<li><b>exits_$n:</b> Advertised bandwidth in Gbit/s of n-th fastest relay with the <b>"Exit"</b> relay flag.</li>
+<li><b>n:</b> Position of the relay in an ordered list of all advertised bandwidths, starting at 1 for the fastest relay in the network.</li>
+<li><b>all:</b> Advertised bandwidth in Gbit/s of n-th fastest relay.</li>
+<li><b>exits:</b> Advertised bandwidth in Gbit/s of n-th fastest relay with the <b>"Exit"</b> relay flag.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace <b>all_n$</b> and <b>exits_$n</b> columns by three columns <b>n</b>, <b>all</b>, and <b>exit</b>.</p>
-</div>
-
 <h3>Consumed bandwidth by Exit/Guard flag combination
 <a href="/bwhist-flags.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/bwhist-flags.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -533,22 +474,12 @@ Traffic <a href="#traffic" name="traffic" class="anchor">#</a></h2>
 
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) for which statistics on uni-/bidirectional connection usage were reported.</li>
-<li><b>both_md:</b> Median of fraction of connections classified as both reading and writing.</li>
-<li><b>both_q1:</b> First quartile of fraction of connections classified as both reading and writing.</li>
-<li><b>both_q3:</b> Third quartile of fraction of connections classified as both reading and writing.</li>
-<li><b>read_md:</b> Median of fraction of connections classified as mostly reading.</li>
-<li><b>read_q1:</b> First quartile of fraction of connections classified as mostly reading.</li>
-<li><b>read_q3:</b> Third quartile of fraction of connections classified as mostly reading.</li>
-<li><b>write_md:</b> Median of fraction of connections classified as mostly writing.</li>
-<li><b>write_q1:</b> First quartile of fraction of connections classified as mostly writing.</li>
-<li><b>write_q3:</b> Third quartile of fraction of connections classified as mostly writing.</li>
+<li><b>direction:</b> Direction of reported fraction, which can be <b>"read"</b>, <b>"write"</b>, or <b>"both"</b> for connections classified as "mostly reading", "mostly writing", or "both reading and writing". Connections below the threshold have been removed from this statistics file entirely.</li>
+<li><b>q1:</b> First quartile of fraction of connections.</li>
+<li><b>md:</b> Median of fraction of connections.</li>
+<li><b>q3:</b> Third quartile of fraction of connections.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace columns except <b>date</b> by four columns <b>direction</b>, <b>q1</b>, <b>md</b>, and <b>q3</b>.</p>
-</div>
-
 </div>
 
 <div class="container">
@@ -679,13 +610,9 @@ Onion Services <a href="#onion-services" name="servers" class="anchor">#</a></h2
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when relays have been listed as running.</li>
 <li><b>onions:</b> Estimated number of unique .onion addresses observed by onion-service directories.</li>
+<li><b>frac:</b> Total network fraction of statistics reported by onion-service directories.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column as suggested on <a href="https://bugs.torproject.org/26950">#26950</a>.</p>
-</div>
-
 <h3>Onion-service traffic (versions 2 and 3)
 <a href="/hidserv-rend-relayed-cells.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/hidserv-rend-relayed-cells.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -703,13 +630,9 @@ Onion Services <a href="#onion-services" name="servers" class="anchor">#</a></h2
 <ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when relays have been listed as running.</li>
 <li><b>relayed:</b> Estimated bandwidth in Gbit/s relayed on rendezvous circuits as observed by rendezvous points.</li>
+<li><b>frac:</b> Total network fraction of statistics reported by rendezvous points.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Add <b>frac</b> column as suggested on <a href="https://bugs.torproject.org/26950">#26950</a>.</p>
-</div>
-
 <h3>Fraction of relays reporting onion-service statistics
 <a href="/hidserv-frac-reporting.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/hidserv-frac-reporting.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>
@@ -800,14 +723,10 @@ Applications <a href="#applications" name="applications" class="anchor">#</a></h
 
 </ul>
 <li><b>date:</b> UTC date (YYYY-MM-DD) when requests to <code>torproject.org</code> web servers have been logged.</li>
-<li><b>$locale:</b> Number of Tor Browser initial downloads for the given locale; limited to the top-5 locales in the requested time period.</li>
+<li><b>locale:</b> Locale, like "en-US" for English (United States), "de" for German, etc., and "??" for unrecognized locales.</li>
+<li><b>initial_downloads:</b> Number of Tor Browser initial downloads.</li>
 </ul>
 
-<div class="bs-callout bs-callout-warning">
-<h3>Suggested change</h3>
-<p>Replace all locale-specific columns by two columns <b>locale</b> and <b>count</b> to avoid dynamically changing columns. Maybe also take out the limitation to top-5 locales in the file (not the graph), similar to how the "Bridge users by country and transport" file contains all transports, not just the top-3 ones.</p>
-</div>
-
 <h3>Tor Messenger downloads and updates
 <a href="/webstats-tm.html" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> graph</a>
 <a href="/webstats-tm.csv" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> data</a>





More information about the tor-commits mailing list