[tor-commits] [metrics-web/master] Add #8462 graphs.

karsten at torproject.org karsten at torproject.org
Mon May 6 18:38:24 UTC 2013


commit b50aabcced4b363c8fff1c8e8062e471af59735a
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon May 6 20:36:34 2013 +0200

    Add #8462 graphs.
---
 etc/web.xml                                        |   12 ++
 rserve/graphs.R                                    |   74 +++++++++
 .../ernie/web/graphs/GraphParameterChecker.java    |   75 +++++-----
 .../ernie/web/graphs/RObjectGenerator.java         |    8 +
 web/WEB-INF/users.jsp                              |  167 ++++++++++++++++++++
 5 files changed, 300 insertions(+), 36 deletions(-)

diff --git a/etc/web.xml b/etc/web.xml
index 79ac4c1..630af3e 100644
--- a/etc/web.xml
+++ b/etc/web.xml
@@ -197,6 +197,18 @@
     <url-pattern>/almost-fast-exits.png</url-pattern>
     <url-pattern>/almost-fast-exits.pdf</url-pattern>
     <url-pattern>/almost-fast-exits.svg</url-pattern>
+    <url-pattern>/userstats-relay-country.png</url-pattern>
+    <url-pattern>/userstats-relay-country.pdf</url-pattern>
+    <url-pattern>/userstats-relay-country.svg</url-pattern>
+    <url-pattern>/userstats-bridge-country.png</url-pattern>
+    <url-pattern>/userstats-bridge-country.pdf</url-pattern>
+    <url-pattern>/userstats-bridge-country.svg</url-pattern>
+    <url-pattern>/userstats-bridge-transport.png</url-pattern>
+    <url-pattern>/userstats-bridge-transport.pdf</url-pattern>
+    <url-pattern>/userstats-bridge-transport.svg</url-pattern>
+    <url-pattern>/userstats-bridge-version.png</url-pattern>
+    <url-pattern>/userstats-bridge-version.pdf</url-pattern>
+    <url-pattern>/userstats-bridge-version.svg</url-pattern>
   </servlet-mapping>
 
   <servlet>
diff --git a/rserve/graphs.R b/rserve/graphs.R
index 72bbc1e..b1ffd86 100644
--- a/rserve/graphs.R
+++ b/rserve/graphs.R
@@ -1022,3 +1022,77 @@ plot_bandwidth_flags <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 72)
 }
 
+plot_userstats <- function(start, end, node, variable, value, path) {
+  end <- min(end, as.character(Sys.Date()))
+  u <- read.csv(paste("/srv/metrics.torproject.org/task-8462-graphs/",
+    "task-8462/userstats.csv", sep = ""),
+    stringsAsFactors = FALSE)
+  if (node == 'relay') {
+    if (value != 'all') {
+      u <- u[u$country == value & u$node == 'relay', ]
+      title <- paste("Directly connecting users from ",
+                     countryname(value), " (BETA)\n", sep = "")
+    } else {
+      u <- u[u$country == '' & u$transport == '' & u$version == '' &
+             u$node == 'relay', ]
+      title <- "Directly connecting users (BETA)\n"
+    }
+  } else if (variable == 'transport') {
+    u <- u[u$transport == value & u$node == 'bridge', ]
+    title <- paste("Bridge users using transport ", value, " (BETA)\n",
+                   sep = "")
+  } else if (variable == 'version') {
+    u <- u[u$version== value & u$node == 'bridge', ]
+    title <- paste("Bridge users using IP", value, " (BETA)\n", sep = "")
+  } else {
+    if (value != 'all') {
+      u <- u[u$country == value & u$node == 'bridge', ]
+      title <- paste("Bridge users from ", countryname(value),
+                     " (BETA)\n", sep = "")
+    } else {
+      u <- u[u$country == '' & u$transport == '' & u$version == '' &
+            u$node == 'bridge', ]
+      title <- "Bridge users (BETA)\n"
+    }
+  }
+  u <- data.frame(date = as.Date(u$date, "%Y-%m-%d"), users = u$users)
+  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
+      to = as.Date(end, "%Y-%m-%d"), by="1 day")
+  missing <- setdiff(dates, u$date)
+  if (length(missing) > 0) {
+    u <- rbind(u,
+        data.frame(date = as.Date(missing, origin = "1970-01-01"),
+        users = NA))
+  }
+  formatter <- function(x, ...) { format(x, scientific = FALSE, ...) }
+  date_breaks <- date_breaks(
+    as.numeric(max(u$date) - min(u$date)))
+  ggplot(u, aes(x = date, y = users)) +
+    geom_line(size = 1) +
+    scale_x_date(name = paste("\nThe Tor Project - ",
+        "https://metrics.torproject.org/", sep = ""),
+        format = date_breaks$format, major = date_breaks$major,
+        minor = date_breaks$minor) +
+    scale_y_continuous(name = "", limits = c(0,
+        ifelse(length(na.omit(u$users)) == 0, 0,
+        max(u$users, na.rm = TRUE))), formatter = formatter) +
+    opts(title = title)
+  ggsave(filename = path, width = 8, height = 5, dpi = 72)
+}
+
+plot_userstats_relay_country <- function(start, end, country, path) {
+  plot_userstats(start, end, 'relay', 'country', country, path)
+}
+
+plot_userstats_bridge_country <- function(start, end, country, path) {
+  plot_userstats(start, end, 'bridge', 'country', country, path)
+}
+
+plot_userstats_bridge_transport <- function(start, end, transport, path) {
+  plot_userstats(start, end, 'bridge', 'transport', transport, path)
+}
+
+plot_userstats_bridge_version <- function(start, end, version, path) {
+  plot_userstats(start, end, 'bridge', 'version', version, path)
+}
+
diff --git a/src/org/torproject/ernie/web/graphs/GraphParameterChecker.java b/src/org/torproject/ernie/web/graphs/GraphParameterChecker.java
index 8300be5..74ca6f9 100644
--- a/src/org/torproject/ernie/web/graphs/GraphParameterChecker.java
+++ b/src/org/torproject/ernie/web/graphs/GraphParameterChecker.java
@@ -56,9 +56,11 @@ public class GraphParameterChecker {
     }
     this.knownParameterValues.put("country", sb.toString());
     this.knownParameterValues.put("events", "on,off,points");
-    this.knownParameterValues.put("language", "all,en,zh_CN,fa");
     this.knownParameterValues.put("source", "all,siv,moria,torperf");
     this.knownParameterValues.put("filesize", "50kb,1mb,5mb");
+    this.knownParameterValues.put("transport",
+        "obfs2,obfs3,websocket,<OR>,<??>");
+    this.knownParameterValues.put("version", "v4,v6");
   }
 
   public void setAvailableGraphs(Map<String, String> availableGraphs) {
@@ -187,26 +189,6 @@ public class GraphParameterChecker {
       recognizedGraphParameters.put("events", eventsParameter);
     }
 
-    /* Parse language if supported by the graph type. Only a single
-     * language can be passed. If no language is passed, use "all" as
-     * default. */
-    if (supportedGraphParameters.contains("language")) {
-      String[] languageParameter = (String[]) requestParameters.get(
-          "language");
-      List<String> knownBundles = Arrays.asList(
-          this.knownParameterValues.get("language").split(","));
-      if (languageParameter != null) {
-        if (languageParameter.length != 1 ||
-            languageParameter[0].length() == 0 ||
-            !knownBundles.contains(languageParameter[0])) {
-          return null;
-        }
-      } else {
-        languageParameter = new String[] { "all" };
-      }
-      recognizedGraphParameters.put("language", languageParameter);
-    }
-
     /* Parse torperf data source if supported by the graph type. Only a
      * single source can be passed. If no source is passed, use "torperf"
      * as default. */
@@ -251,23 +233,44 @@ public class GraphParameterChecker {
       recognizedGraphParameters.put("filesize", filesizeParameter);
     }
 
-    /* Parse fingerprint if supported/required by the graph type. Make
-     * sure the fingerprint contains only hexadecimal characters and is 40
-     * characters long. Fail if no fingerprint is provided! */
-    if (supportedGraphParameters.contains("fingerprint")) {
-      String[] fingerprintParameter = (String[]) requestParameters.get(
-          "fingerprint");
-      if (fingerprintParameter == null ||
-          fingerprintParameter.length != 1 ||
-          fingerprintParameter[0] == null ||
-          !Pattern.matches("[0-9a-f]{40}",
-          fingerprintParameter[0].toLowerCase())) {
-        return null;
+    /* Parse transports if supported by the graph type. If no transports
+     * are passed, use "<OR>" as default. */
+    if (supportedGraphParameters.contains("transport")) {
+      String[] transportParameters = (String[]) requestParameters.get(
+          "transport");
+      List<String> knownTransports = Arrays.asList(
+          this.knownParameterValues.get("transport").split(","));
+      if (transportParameters != null) {
+        for (String transport : transportParameters) {
+          if (transport == null || transport.length() == 0 ||
+              !knownTransports.contains(transport)) {
+            return null;
+          }
+        }
+      } else {
+        transportParameters = new String[] { "<OR>" };
+      }
+      recognizedGraphParameters.put("transport", transportParameters);
+    }
+
+    /* Parse versions if supported by the graph type. If no versions
+     * are passed, use "v4" as default. */
+    if (supportedGraphParameters.contains("version")) {
+      String[] versionParameters = (String[]) requestParameters.get(
+          "version");
+      List<String> knownVersions = Arrays.asList(
+          this.knownParameterValues.get("version").split(","));
+      if (versionParameters != null) {
+        for (String version : versionParameters) {
+          if (version == null || version.length() == 0 ||
+              !knownVersions.contains(version)) {
+            return null;
+          }
+        }
       } else {
-        fingerprintParameter[0] = fingerprintParameter[0].toLowerCase();
-        recognizedGraphParameters.put("fingerprint",
-            fingerprintParameter);
+        versionParameters = new String[] { "v4" };
       }
+      recognizedGraphParameters.put("version", versionParameters);
     }
 
     /* We now have a map with all required graph parameters. Return it. */
diff --git a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java
index 6ed77f2..1fb7183 100644
--- a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java
+++ b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java
@@ -105,6 +105,14 @@ public class RObjectGenerator implements ServletContextListener {
     this.availableGraphs.put("connbidirect", "start,end,filename");
     this.availableGraphs.put("fast-exits", "start,end,filename");
     this.availableGraphs.put("almost-fast-exits", "start,end,filename");
+    this.availableGraphs.put("userstats-relay-country",
+        "start,end,country,filename");
+    this.availableGraphs.put("userstats-bridge-country",
+        "start,end,country,filename");
+    this.availableGraphs.put("userstats-bridge-transport",
+        "start,end,transport,filename");
+    this.availableGraphs.put("userstats-bridge-version",
+        "start,end,version,filename");
     this.availableGraphFileTypes = new HashSet<String>(Arrays.asList(
         "png,pdf,svg".split(",")));
     GraphParameterChecker.getInstance().setAvailableGraphs(
diff --git a/web/WEB-INF/users.jsp b/web/WEB-INF/users.jsp
index d75bf7a..d7d17d5 100644
--- a/web/WEB-INF/users.jsp
+++ b/web/WEB-INF/users.jsp
@@ -207,6 +207,173 @@ Tor users (direct and bridge) per month by country.</p>
 <p><a href="csv/monthly-users-average.csv">CSV</a> file containing average
 daily Tor users (direct and bridge) per month by country.</p>
 <br>
+
+<a name="userstats"></a>
+<h3><a href="#userstats" class="anchor">New approach to estimating daily
+Tor users (BETA)</a></h3>
+<br>
+<p>As of April 2013, we are experimenting with a new approach to estimating
+daily Tor users.
+The new approach works very similar to the existing approach to estimate
+directly connecting users, but can also be applied to bridge users.
+This new approach can break down user numbers by country, pluggable
+transport, and IP version.
+See tech report on
+<a href="https://research.torproject.org/techreports/counting-daily-bridge-users-2012-10-24.pdf">Counting daily bridge users</a>
+and the
+<a href="https://gitweb.torproject.org/metrics-tasks.git/tree/HEAD:/task-8462">source code</a>
+for details.
+
+<p><font color="red">Note that this approach should be considered
+experimental and absolute numbers should be taken with care!</font></p>
+
+<a name="userstats-relay-country"></a>
+<p><b>Direct users by country (BETA):</b></p>
+
+<font color="red">In contrast to the graphs above, this graph is based on
+requests to directory mirrors <i>and</i> directory authorities.
+That is why the numbers here are higher.
+It's yet to be decided which approach is more correct.</font>
+
+<img src="userstats-relay-country.png${userstats_relay_country_url}"
+     width="576" height="360" alt="Direct users by country graph (BETA)">
+<form action="users.html#userstats-relay-country">
+  <div class="formrow">
+    <input type="hidden" name="graph" value="userstats-relay-country">
+    <p>
+    <label>Start date (yyyy-mm-dd):</label>
+      <input type="text" name="start" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_relay_country_start) == 0}">${default_start_date}</c:when><c:otherwise>${userstats_relay_country_start[0]}</c:otherwise></c:choose>">
+    <label>End date (yyyy-mm-dd):</label>
+      <input type="text" name="end" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_relay_country_end) == 0}">${default_end_date}</c:when><c:otherwise>${userstats_relay_country_end[0]}</c:otherwise></c:choose>">
+    </p><p>
+      Source: <select name="country">
+        <option value="all"<c:if test="${userstats_relay_country_country[0] eq 'all'}"> selected</c:if>>All users</option>
+        <c:forEach var="country" items="${countries}" >
+          <option value="${country[0]}"<c:if test="${userstats_relay_country_country[0] eq country[0]}"> selected</c:if>>${country[1]}</option>
+        </c:forEach>
+      </select>
+    </p><p>
+    <input class="submit" type="submit" value="Update graph">
+    </p>
+  </div>
+</form>
+<p>Download graph as
+<a href="userstats-relay-country.pdf${userstats_relay_country_url}">PDF</a> or
+<a href="userstats-relay-country.svg${userstats_relay_country_url}">SVG</a>.</p>
+<hr>
+
+<a name="userstats-bridge-country"></a>
+<p><b>Bridge users by country (BETA):</b></p>
+
+<font color="red">In contrast to the bridge-user graph above, this graph
+uses directory requests to estimate user numbers, not unique IP address sets.
+It's yet to be decided which approach is more correct.</font>
+
+<img src="userstats-bridge-country.png${userstats_bridge_country_url}"
+     width="576" height="360" alt="Bridge users by country graph (BETA)">
+<form action="users.html#userstats-bridge-country">
+  <div class="formrow">
+    <input type="hidden" name="graph" value="userstats-bridge-country">
+    <p>
+    <label>Start date (yyyy-mm-dd):</label>
+      <input type="text" name="start" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_bridge_country_start) == 0}">${default_start_date}</c:when><c:otherwise>${userstats_bridge_country_start[0]}</c:otherwise></c:choose>">
+    <label>End date (yyyy-mm-dd):</label>
+      <input type="text" name="end" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_bridge_country_end) == 0}">${default_end_date}</c:when><c:otherwise>${userstats_bridge_country_end[0]}</c:otherwise></c:choose>">
+    </p><p>
+      Source: <select name="country">
+        <option value="all"<c:if test="${userstats_bridge_country_country[0] eq 'all'}"> selected</c:if>>All users</option>
+        <c:forEach var="country" items="${countries}" >
+          <option value="${country[0]}"<c:if test="${userstats_bridge_country_country[0] eq country[0]}"> selected</c:if>>${country[1]}</option>
+        </c:forEach>
+      </select>
+    </p><p>
+    <input class="submit" type="submit" value="Update graph">
+    </p>
+  </div>
+</form>
+<p>Download graph as
+<a href="userstats-bridge-country.pdf${userstats_bridge_country_url}">PDF</a> or
+<a href="userstats-bridge-country.svg${userstats_bridge_country_url}">SVG</a>.</p>
+<hr>
+
+<a name="userstats-bridge-transport"></a>
+<p><b>Bridge users by transport (BETA):</b></p>
+
+<font color="red">Almost none of the currently running bridges report the
+transport name of connecting users, which is why non-OR transport usage is
+so low.
+By default, we consider all users of a bridge OR transport users, unless told
+otherwise.
+Non-OR transport numbers will become more accurate over time.</font>
+
+<img src="userstats-bridge-transport.png${userstats_bridge_transport_url}"
+     width="576" height="360" alt="Bridge users by transport graph (BETA)">
+<form action="users.html#userstats-bridge-transport">
+  <div class="formrow">
+    <input type="hidden" name="graph" value="userstats-bridge-transport">
+    <p>
+    <label>Start date (yyyy-mm-dd):</label>
+      <input type="text" name="start" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_bridge_transport_start) == 0}">${default_start_date}</c:when><c:otherwise>${userstats_bridge_transport_start[0]}</c:otherwise></c:choose>">
+    <label>End date (yyyy-mm-dd):</label>
+      <input type="text" name="end" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_bridge_transport_end) == 0}">${default_end_date}</c:when><c:otherwise>${userstats_bridge_transport_end[0]}</c:otherwise></c:choose>">
+    </p><p>
+      Source: <select name="transport">
+        <option value="<OR>"<c:if test="${userstats_bridge_transport_transport[0] eq '<OR>'}"> selected</c:if>>Default OR protocol</option>
+        <option value="obfs2"<c:if test="${userstats_bridge_transport_transport[0] eq 'obfs2'}"> selected</c:if>>obfs2</option>
+        <option value="obfs3"<c:if test="${userstats_bridge_transport_transport[0] eq 'obfs3'}"> selected</c:if>>obfs3</option>
+        <option value="websocket"<c:if test="${userstats_bridge_transport_transport[0] eq 'websocket'}"> selected</c:if>>Flash proxy/websocket</option>
+        <option value="<??>"<c:if test="${userstats_bridge_transport_transport[0] eq '<??>'}"> selected</c:if>>Unknown transport</option>
+      </select>
+    </p><p>
+    <input class="submit" type="submit" value="Update graph">
+    </p>
+  </div>
+</form>
+<p>Download graph as
+<a href="userstats-bridge-transport.pdf${userstats_bridge_transport_url}">PDF</a> or
+<a href="userstats-bridge-transport.svg${userstats_bridge_transport_url}">SVG</a>.</p>
+<hr>
+
+<a name="userstats-bridge-version"></a>
+<p><b>Bridge users by IP version (BETA):</b></p>
+
+<font color="red">Not all of the currently running bridges report the
+IP version of connecting users.
+By default, we consider all users of a bridge IPv4 users, unless told
+otherwise.
+IPv6 numbers will become more accurate over time.</font>
+
+<img src="userstats-bridge-version.png${userstats_bridge_version_url}"
+     width="576" height="360" alt="Bridge users by IP version graph (BETA)">
+<form action="users.html#userstats-bridge-version">
+  <div class="formrow">
+    <input type="hidden" name="graph" value="userstats-bridge-version">
+    <p>
+    <label>Start date (yyyy-mm-dd):</label>
+      <input type="text" name="start" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_bridge_version_start) == 0}">${default_start_date}</c:when><c:otherwise>${userstats_bridge_version_start[0]}</c:otherwise></c:choose>">
+    <label>End date (yyyy-mm-dd):</label>
+      <input type="text" name="end" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_bridge_version_end) == 0}">${default_end_date}</c:when><c:otherwise>${userstats_bridge_version_end[0]}</c:otherwise></c:choose>">
+    </p><p>
+      Source: <select name="version">
+        <option value="v4"<c:if test="${userstats_bridge_version_version[0] eq 'v4'}"> selected</c:if>>IPv4</option>
+        <option value="v6"<c:if test="${userstats_bridge_version_version[0] eq 'v6'}"> selected</c:if>>IPv6</option>
+      </select>
+    </p><p>
+    <input class="submit" type="submit" value="Update graph">
+    </p>
+  </div>
+</form>
+<p>Download graph as
+<a href="userstats-bridge-version.pdf${userstats_bridge_version_url}">PDF</a> or
+<a href="userstats-bridge-version.svg${userstats_bridge_version_url}">SVG</a>.</p>
     </div>
   </div>
   <div class="bottom" id="bottom">



More information about the tor-commits mailing list