commit 17f37d58cd67fd3862fa00587cee9f6bedf5bc7d Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon Sep 16 16:08:45 2013 +0200
Add top-10 country tables to new user estimates. --- rserve/tables.R | 30 +++++++++ .../ernie/web/graphs/GraphsSubpagesServlet.java | 5 +- .../ernie/web/graphs/RObjectGenerator.java | 2 + web/WEB-INF/users.jsp | 70 ++++++++++++++++++++ 4 files changed, 105 insertions(+), 2 deletions(-)
diff --git a/rserve/tables.R b/rserve/tables.R index eb8f676..e0dc1e4 100644 --- a/rserve/tables.R +++ b/rserve/tables.R @@ -92,3 +92,33 @@ write_bridge_users <- function(start, end, path) { write.csv(d, path, quote = FALSE, row.names = FALSE) }
+write_userstats <- function(start, end, node, path) { + end <- min(end, as.character(Sys.Date())) + u <- read.csv(paste("/srv/metrics.torproject.org/task-8462-graphs/", + "task-8462/userstats.csv", sep = ""), + stringsAsFactors = FALSE) + u <- u[u$date >= start & u$date <= end & u$country != '' & + u$transport == '' & u$version == '' & u$node == node, + c("country", "users")] + u <- aggregate(list(users = u$users), by = list(country = u$country), + mean) + total <- sum(u$users) + u <- u[!(u$country %in% c("zy", "??", "a1", "a2", "o1", "ap", "eu")), ] + u <- u[order(u$users, decreasing = TRUE), ] + u <- u[1:10, ] + u <- data.frame( + cc = as.character(u$country), + country = sub('the ', '', countrynames(as.character(u$country))), + abs = round(u$users), + rel = round(100 * u$users / total, 2)) + write.csv(u, path, quote = FALSE, row.names = FALSE) +} + +write_userstats_relay <- function(start, end, path) { + write_userstats(start, end, 'relay', path) +} + +write_userstats_bridge <- function(start, end, path) { + write_userstats(start, end, 'bridge', path) +} + diff --git a/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java b/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java index 12a098d..3ac99bb 100644 --- a/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java +++ b/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java @@ -50,8 +50,9 @@ public class GraphsSubpagesServlet extends HttpServlet { this.availableGraphsSubpageTables = new HashMap<String, Set<String>>(); this.availableGraphsSubpageTables.put("users.html", - new HashSet<String>(Arrays.asList( - "direct-users,censorship-events,bridge-users".split(",")))); + new HashSet<String>(Arrays.asList(( + "direct-users,censorship-events,bridge-users,userstats-relay," + + "userstats-bridge").split(","))));
this.knownCountries = Countries.getInstance().getCountryList(); } diff --git a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java index 69092e6..a927578 100644 --- a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java +++ b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java @@ -80,6 +80,8 @@ public class RObjectGenerator implements ServletContextListener { this.availableTables.put("direct-users", "start,end,filename"); this.availableTables.put("censorship-events", "start,end,filename"); this.availableTables.put("bridge-users", "start,end,filename"); + this.availableTables.put("userstats-relay", "start,end,filename"); + this.availableTables.put("userstats-bridge", "start,end,filename"); TableParameterChecker.getInstance().setAvailableTables( availableTables);
diff --git a/web/WEB-INF/users.jsp b/web/WEB-INF/users.jsp index 93b89b8..10b630a 100644 --- a/web/WEB-INF/users.jsp +++ b/web/WEB-INF/users.jsp @@ -230,10 +230,12 @@ experimental and absolute numbers should be taken with care!</font></p> <a name="userstats-relay-country"></a> <p><b>Direct users by country (BETA):</b></p>
+<p> <font color="red">In contrast to the graphs above, this graph is based on requests to directory mirrors <i>and</i> directory authorities. That is why the numbers here are higher. It's yet to be decided which approach is more correct.</font> +</p>
<img src="userstats-relay-country.png${userstats_relay_country_url}" width="576" height="360" alt="Direct users by country graph (BETA)"> @@ -263,13 +265,46 @@ It's yet to be decided which approach is more correct.</font> <a href="userstats-relay-country.pdf${userstats_relay_country_url}">PDF</a> or <a href="userstats-relay-country.svg${userstats_relay_country_url}">SVG</a>.</p> <hr> +<a name="userstats-relay-table"></a> +<p><b>Top-10 countries by directly connecting users (BETA):</b></p> +<form action="users.html#userstats-relay-table"> + <div class="formrow"> + <input type="hidden" name="table" value="userstats-relay"> + <p> + <label>Start date (yyyy-mm-dd):</label> + <input type="text" name="start" size="10" + value="<c:choose><c:when test="${fn:length(userstats_relay_start) == 0}">${default_start_date}</c:when><c:otherwise>${userstats_relay_start[0]}</c:otherwise></c:choose>"> + <label>End date (yyyy-mm-dd):</label> + <input type="text" name="end" size="10" + value="<c:choose><c:when test="${fn:length(userstats_relay_end) == 0}">${default_end_date}</c:when><c:otherwise>${userstats_relay_end[0]}</c:otherwise></c:choose>"> + </p><p> + <input class="submit" type="submit" value="Update table"> + </p> + </div> +</form> +<br> +<table> + <tr> + <th>Country</th> + <th>Mean daily users</th> + </tr> + <c:forEach var="row" items="${userstats_relay_tabledata}"> + <tr> + <td><a href="users.html?graph=userstats-relay-country&country=${row['cc']}#userstats-relay">${row['country']}</a> </td> + <td>${row['abs']} (<fmt:formatNumber type="number" minFractionDigits="2" value="${row['rel']}" /> %)</td> + </tr> + </c:forEach> +</table> +<hr>
<a name="userstats-bridge-country"></a> <p><b>Bridge users by country (BETA):</b></p>
+<p> <font color="red">In contrast to the bridge-user graph above, this graph uses directory requests to estimate user numbers, not unique IP address sets. It's yet to be decided which approach is more correct.</font> +</p>
<img src="userstats-bridge-country.png${userstats_bridge_country_url}" width="576" height="360" alt="Bridge users by country graph (BETA)"> @@ -299,16 +334,49 @@ It's yet to be decided which approach is more correct.</font> <a href="userstats-bridge-country.pdf${userstats_bridge_country_url}">PDF</a> or <a href="userstats-bridge-country.svg${userstats_bridge_country_url}">SVG</a>.</p> <hr> +<a name="userstats-bridge-table"></a> +<p><b>Top-10 countries by bridge users (BETA):</b></p> +<form action="users.html#userstats-bridge-table"> + <div class="formrow"> + <input type="hidden" name="table" value="userstats-bridge"> + <p> + <label>Start date (yyyy-mm-dd):</label> + <input type="text" name="start" size="10" + value="<c:choose><c:when test="${fn:length(userstats_bridge_start) == 0}">${default_start_date}</c:when><c:otherwise>${userstats_bridge_start[0]}</c:otherwise></c:choose>"> + <label>End date (yyyy-mm-dd):</label> + <input type="text" name="end" size="10" + value="<c:choose><c:when test="${fn:length(userstats_bridge_end) == 0}">${default_end_date}</c:when><c:otherwise>${userstats_bridge_end[0]}</c:otherwise></c:choose>"> + </p><p> + <input class="submit" type="submit" value="Update table"> + </p> + </div> +</form> +<br> +<table> + <tr> + <th>Country</th> + <th>Mean daily users</th> + </tr> + <c:forEach var="row" items="${userstats_bridge_tabledata}"> + <tr> + <td><a href="users.html?graph=userstats-bridge-country&country=${row['cc']}#userstats-bridge">${row['country']}</a> </td> + <td>${row['abs']} (<fmt:formatNumber type="number" minFractionDigits="2" value="${row['rel']}" /> %)</td> + </tr> + </c:forEach> +</table> +<hr>
<a name="userstats-bridge-transport"></a> <p><b>Bridge users by transport (BETA):</b></p>
+<p> <font color="red">Almost none of the currently running bridges report the transport name of connecting users, which is why non-OR transport usage is so low. By default, we consider all users of a bridge OR transport users, unless told otherwise. Non-OR transport numbers will become more accurate over time.</font> +</p>
<img src="userstats-bridge-transport.png${userstats_bridge_transport_url}" width="576" height="360" alt="Bridge users by transport graph (BETA)"> @@ -343,11 +411,13 @@ Non-OR transport numbers will become more accurate over time.</font> <a name="userstats-bridge-version"></a> <p><b>Bridge users by IP version (BETA):</b></p>
+<p> <font color="red">Not all of the currently running bridges report the IP version of connecting users. By default, we consider all users of a bridge IPv4 users, unless told otherwise. IPv6 numbers will become more accurate over time.</font> +</p>
<img src="userstats-bridge-version.png${userstats_bridge_version_url}" width="576" height="360" alt="Bridge users by IP version graph (BETA)">