[tor-commits] [metrics-web/master] Add top-10 country tables to new user estimates.

karsten at torproject.org karsten at torproject.org
Mon Sep 16 18:00:12 UTC 2013


commit 17f37d58cd67fd3862fa00587cee9f6bedf5bc7d
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Sep 16 16:08:45 2013 +0200

    Add top-10 country tables to new user estimates.
---
 rserve/tables.R                                    |   30 +++++++++
 .../ernie/web/graphs/GraphsSubpagesServlet.java    |    5 +-
 .../ernie/web/graphs/RObjectGenerator.java         |    2 +
 web/WEB-INF/users.jsp                              |   70 ++++++++++++++++++++
 4 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/rserve/tables.R b/rserve/tables.R
index eb8f676..e0dc1e4 100644
--- a/rserve/tables.R
+++ b/rserve/tables.R
@@ -92,3 +92,33 @@ write_bridge_users <- function(start, end, path) {
   write.csv(d, path, quote = FALSE, row.names = FALSE)
 }
 
+write_userstats <- function(start, end, node, path) {
+  end <- min(end, as.character(Sys.Date()))
+  u <- read.csv(paste("/srv/metrics.torproject.org/task-8462-graphs/",
+    "task-8462/userstats.csv", sep = ""),
+    stringsAsFactors = FALSE)
+  u <- u[u$date >= start & u$date <= end & u$country != '' &
+         u$transport == '' & u$version == '' & u$node == node,
+         c("country", "users")]
+  u <- aggregate(list(users = u$users), by = list(country = u$country),
+                 mean)
+  total <- sum(u$users)
+  u <- u[!(u$country %in% c("zy", "??", "a1", "a2", "o1", "ap", "eu")), ]
+  u <- u[order(u$users, decreasing = TRUE), ]
+  u <- u[1:10, ]
+  u <- data.frame(
+    cc = as.character(u$country),
+    country = sub('the ', '', countrynames(as.character(u$country))),
+    abs = round(u$users),
+    rel = round(100 * u$users / total, 2))
+  write.csv(u, path, quote = FALSE, row.names = FALSE)
+}
+
+write_userstats_relay <- function(start, end, path) {
+  write_userstats(start, end, 'relay', path)
+}
+
+write_userstats_bridge <- function(start, end, path) {
+  write_userstats(start, end, 'bridge', path)
+}
+
diff --git a/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java b/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java
index 12a098d..3ac99bb 100644
--- a/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java
+++ b/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java
@@ -50,8 +50,9 @@ public class GraphsSubpagesServlet extends HttpServlet {
     this.availableGraphsSubpageTables =
         new HashMap<String, Set<String>>();
     this.availableGraphsSubpageTables.put("users.html",
-        new HashSet<String>(Arrays.asList(
-        "direct-users,censorship-events,bridge-users".split(","))));
+        new HashSet<String>(Arrays.asList((
+        "direct-users,censorship-events,bridge-users,userstats-relay,"
+        + "userstats-bridge").split(","))));
 
     this.knownCountries = Countries.getInstance().getCountryList();
   }
diff --git a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java
index 69092e6..a927578 100644
--- a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java
+++ b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java
@@ -80,6 +80,8 @@ public class RObjectGenerator implements ServletContextListener {
     this.availableTables.put("direct-users", "start,end,filename");
     this.availableTables.put("censorship-events", "start,end,filename");
     this.availableTables.put("bridge-users", "start,end,filename");
+    this.availableTables.put("userstats-relay", "start,end,filename");
+    this.availableTables.put("userstats-bridge", "start,end,filename");
     TableParameterChecker.getInstance().setAvailableTables(
         availableTables);
 
diff --git a/web/WEB-INF/users.jsp b/web/WEB-INF/users.jsp
index 93b89b8..10b630a 100644
--- a/web/WEB-INF/users.jsp
+++ b/web/WEB-INF/users.jsp
@@ -230,10 +230,12 @@ experimental and absolute numbers should be taken with care!</font></p>
 <a name="userstats-relay-country"></a>
 <p><b>Direct users by country (BETA):</b></p>
 
+<p>
 <font color="red">In contrast to the graphs above, this graph is based on
 requests to directory mirrors <i>and</i> directory authorities.
 That is why the numbers here are higher.
 It's yet to be decided which approach is more correct.</font>
+</p>
 
 <img src="userstats-relay-country.png${userstats_relay_country_url}"
      width="576" height="360" alt="Direct users by country graph (BETA)">
@@ -263,13 +265,46 @@ It's yet to be decided which approach is more correct.</font>
 <a href="userstats-relay-country.pdf${userstats_relay_country_url}">PDF</a> or
 <a href="userstats-relay-country.svg${userstats_relay_country_url}">SVG</a>.</p>
 <hr>
+<a name="userstats-relay-table"></a>
+<p><b>Top-10 countries by directly connecting users (BETA):</b></p>
+<form action="users.html#userstats-relay-table">
+  <div class="formrow">
+    <input type="hidden" name="table" value="userstats-relay">
+    <p>
+    <label>Start date (yyyy-mm-dd):</label>
+      <input type="text" name="start" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_relay_start) == 0}">${default_start_date}</c:when><c:otherwise>${userstats_relay_start[0]}</c:otherwise></c:choose>">
+    <label>End date (yyyy-mm-dd):</label>
+      <input type="text" name="end" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_relay_end) == 0}">${default_end_date}</c:when><c:otherwise>${userstats_relay_end[0]}</c:otherwise></c:choose>">
+    </p><p>
+    <input class="submit" type="submit" value="Update table">
+    </p>
+  </div>
+</form>
+<br>
+<table>
+  <tr>
+    <th>Country</th>
+    <th>Mean daily users</th>
+  </tr>
+  <c:forEach var="row" items="${userstats_relay_tabledata}">
+    <tr>
+      <td><a href="users.html?graph=userstats-relay-country&country=${row['cc']}#userstats-relay">${row['country']}</a> </td>
+      <td>${row['abs']} (<fmt:formatNumber type="number" minFractionDigits="2" value="${row['rel']}" /> %)</td>
+    </tr>
+  </c:forEach>
+</table>
+<hr>
 
 <a name="userstats-bridge-country"></a>
 <p><b>Bridge users by country (BETA):</b></p>
 
+<p>
 <font color="red">In contrast to the bridge-user graph above, this graph
 uses directory requests to estimate user numbers, not unique IP address sets.
 It's yet to be decided which approach is more correct.</font>
+</p>
 
 <img src="userstats-bridge-country.png${userstats_bridge_country_url}"
      width="576" height="360" alt="Bridge users by country graph (BETA)">
@@ -299,16 +334,49 @@ It's yet to be decided which approach is more correct.</font>
 <a href="userstats-bridge-country.pdf${userstats_bridge_country_url}">PDF</a> or
 <a href="userstats-bridge-country.svg${userstats_bridge_country_url}">SVG</a>.</p>
 <hr>
+<a name="userstats-bridge-table"></a>
+<p><b>Top-10 countries by bridge users (BETA):</b></p>
+<form action="users.html#userstats-bridge-table">
+  <div class="formrow">
+    <input type="hidden" name="table" value="userstats-bridge">
+    <p>
+    <label>Start date (yyyy-mm-dd):</label>
+      <input type="text" name="start" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_bridge_start) == 0}">${default_start_date}</c:when><c:otherwise>${userstats_bridge_start[0]}</c:otherwise></c:choose>">
+    <label>End date (yyyy-mm-dd):</label>
+      <input type="text" name="end" size="10"
+             value="<c:choose><c:when test="${fn:length(userstats_bridge_end) == 0}">${default_end_date}</c:when><c:otherwise>${userstats_bridge_end[0]}</c:otherwise></c:choose>">
+    </p><p>
+    <input class="submit" type="submit" value="Update table">
+    </p>
+  </div>
+</form>
+<br>
+<table>
+  <tr>
+    <th>Country</th>
+    <th>Mean daily users</th>
+  </tr>
+  <c:forEach var="row" items="${userstats_bridge_tabledata}">
+    <tr>
+      <td><a href="users.html?graph=userstats-bridge-country&country=${row['cc']}#userstats-bridge">${row['country']}</a> </td>
+      <td>${row['abs']} (<fmt:formatNumber type="number" minFractionDigits="2" value="${row['rel']}" /> %)</td>
+    </tr>
+  </c:forEach>
+</table>
+<hr>
 
 <a name="userstats-bridge-transport"></a>
 <p><b>Bridge users by transport (BETA):</b></p>
 
+<p>
 <font color="red">Almost none of the currently running bridges report the
 transport name of connecting users, which is why non-OR transport usage is
 so low.
 By default, we consider all users of a bridge OR transport users, unless told
 otherwise.
 Non-OR transport numbers will become more accurate over time.</font>
+</p>
 
 <img src="userstats-bridge-transport.png${userstats_bridge_transport_url}"
      width="576" height="360" alt="Bridge users by transport graph (BETA)">
@@ -343,11 +411,13 @@ Non-OR transport numbers will become more accurate over time.</font>
 <a name="userstats-bridge-version"></a>
 <p><b>Bridge users by IP version (BETA):</b></p>
 
+<p>
 <font color="red">Not all of the currently running bridges report the
 IP version of connecting users.
 By default, we consider all users of a bridge IPv4 users, unless told
 otherwise.
 IPv6 numbers will become more accurate over time.</font>
+</p>
 
 <img src="userstats-bridge-version.png${userstats_bridge_version_url}"
      width="576" height="360" alt="Bridge users by IP version graph (BETA)">





More information about the tor-commits mailing list