commit 41565da93ef5cc3e562f21b8695dbf251b55459a Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon Sep 16 19:17:27 2013 +0200
Extend censorship detector to new user estimates. --- detector/detector.py | 30 ++++++----- detector/detector.sh | 1 + rserve/csv.R | 16 ++++++ rserve/graphs.R | 57 +++++++++++++++----- rserve/tables.R | 38 +++++++++++-- .../ernie/web/graphs/GraphsSubpagesServlet.java | 2 +- .../ernie/web/graphs/RObjectGenerator.java | 5 +- web/WEB-INF/users.jsp | 45 ++++++++++++++++ 8 files changed, 160 insertions(+), 34 deletions(-)
diff --git a/detector/detector.py b/detector/detector.py index 7f924db..2c51b5b 100644 --- a/detector/detector.py +++ b/detector/detector.py @@ -348,8 +348,8 @@ def plot_all(tss, minx, maxx, INTERV, DAYS=None, rdir="img"): summary_file.close()
"""Write a CSV report on the minimum/maximum users of each country per date.""" -def write_all(tss, minc, maxc, INTERVAL=7): - ranges_file = file("direct-users-ranges.csv", "w") +def write_all(tss, minc, maxc, RANGES_FILE, INTERVAL=7): + ranges_file = file(RANGES_FILE, "w") ranges_file.write("date,country,minusers,maxusers\n") exclude = set(["all", "??", "date"]) for c in tss.country_codes: @@ -415,23 +415,25 @@ def write_ml_report(tss, minx, maxx, INTERV, DAYS, notification_period=None):
report_file.close()
-def main(): - # Change these to customize script - CSV_FILE = "direct-users.csv" - GRAPH_DIR = "img" - # Time interval to model connection rates. - INTERV = 7 - # Consider maximum DAYS days back. - DAYS= 6 * 31 - +# INTERV is the time interval to model connection rates; +# consider maximum DAYS days back. +def detect(CSV_FILE = "userstats-detector.csv", + RANGES_FILE = "userstats-ranges.csv", GRAPH_DIR = "img", + INTERV = 7, DAYS = 6 * 31, REPORT = True): tss = torstatstore(CSV_FILE) l = tss.get_largest_locations(50) minx, maxx = make_tendencies_minmax(l, INTERV) #plot_all(tss, minx, maxx, INTERV, DAYS, rdir=GRAPH_DIR) - write_all(tss, minx, maxx, INTERV) + write_all(tss, minx, maxx, RANGES_FILE, INTERV)
- # Make our short report; only consider events of the last day - write_ml_report(tss, minx, maxx, INTERV, DAYS, 1) + if REPORT: + # Make our short report; only consider events of the last day + write_ml_report(tss, minx, maxx, INTERV, DAYS, 1) + +def main(): + detect(CSV_FILE = "direct-users.csv", + RANGES_FILE = "direct-users-ranges.csv") + detect(REPORT = False)
if __name__ == "__main__": main() diff --git a/detector/detector.sh b/detector/detector.sh index 8e2ea47..56f6886 100755 --- a/detector/detector.sh +++ b/detector/detector.sh @@ -1,5 +1,6 @@ #!/bin/bash wget -qO direct-users.csv --no-check-certificate https://metrics.torproject.org/csv/direct-users.csv +wget -qO userstats-detector.csv --no-check-certificate https://metrics.torproject.org/csv/userstats-detector.csv python detector.py cat short_censorship_report.txt | mail -E -s 'Possible censorship events' tor-censorship-events@lists.torproject.org
diff --git a/rserve/csv.R b/rserve/csv.R index 531e73f..34a3f91 100644 --- a/rserve/csv.R +++ b/rserve/csv.R @@ -328,3 +328,19 @@ export_monthly_userstats_average <- function(path) { help_export_monthly_userstats(path, mean) }
+export_userstats_detector <- function(path) { + u <- read.csv(paste("/srv/metrics.torproject.org/task-8462-graphs/", + "task-8462/userstats.csv", sep = ""), + stringsAsFactors = FALSE) + u <- u[u$country != '' & u$transport == '' & u$version == '' & + u$node == 'relay', c("country", "date", "users")] + u <- rbind(u, data.frame(country = "zy", + aggregate(list(users = u$users), + by = list(date = u$date), sum))) + u <- data.frame(date = u$date, country = u$country, + users = floor(u$users)) + u <- cast(u, date ~ country, value = "users") + names(u)[names(u) == "zy"] <- "all" + write.csv(u, path, quote = FALSE, row.names = FALSE) +} + diff --git a/rserve/graphs.R b/rserve/graphs.R index 69f9aa9..da39327 100644 --- a/rserve/graphs.R +++ b/rserve/graphs.R @@ -690,8 +690,8 @@ plot_direct_users <- function(start, end, country, events, path) { if (length(r$maxusers) > 0) max_y <- max(max_y, max(r$maxusers, na.rm = TRUE)) plot <- plot + - geom_ribbon(data = r, aes(ymin = minusers, ymax = maxusers), - fill = "gray") + geom_ribbon(data = r, aes(ymin = max(0, minusers), + ymax = maxusers), fill = "gray") } if (length(upturns$date) > 0) plot <- plot + @@ -1022,7 +1022,8 @@ plot_bandwidth_flags <- function(start, end, path) { ggsave(filename = path, width = 8, height = 5, dpi = 72) }
-plot_userstats <- function(start, end, node, variable, value, path) { +plot_userstats <- function(start, end, node, variable, value, events, + path) { end <- min(end, as.character(Sys.Date())) u <- read.csv(paste("/srv/metrics.torproject.org/task-8462-graphs/", "task-8462/userstats.csv", sep = ""), @@ -1052,7 +1053,7 @@ plot_userstats <- function(start, end, node, variable, value, path) { " (BETA)\n", sep = "") } else { u <- u[u$country == '' & u$transport == '' & u$version == '' & - u$node == 'bridge', ] + u$node == 'bridge', ] title <- "Bridge users (BETA)\n" } } @@ -1068,32 +1069,62 @@ plot_userstats <- function(start, end, node, variable, value, path) { formatter <- function(x, ...) { format(x, scientific = FALSE, ...) } date_breaks <- date_breaks( as.numeric(max(u$date) - min(u$date))) - ggplot(u, aes(x = date, y = users)) + + max_y <- ifelse(length(na.omit(u$users)) == 0, 0, + max(u$users, na.rm = TRUE)) + plot <- ggplot(u, aes(x = date, y = users)) + if (length(na.omit(u$users)) > 0 & events != "off" & + variable == 'country' & value != "all") { + r <- read.csv( + "/srv/metrics.torproject.org/web/detector/userstats-ranges.csv", + stringsAsFactors = FALSE) + r <- r[r$date >= start & r$date <= end & r$country == value, + c("date", "minusers", "maxusers")] + r <- cast(rbind(melt(u, id.vars = "date"), melt(r, id.vars = "date"))) + upturns <- r[r$users > r$maxusers, 1:2] + downturns <- r[r$users < r$minusers, 1:2] + if (events == "on") { + if (length(r$maxusers) > 0) + max_y <- max(max_y, max(r$maxusers, na.rm = TRUE)) + plot <- plot + + geom_ribbon(data = r, aes(ymin = max(0, minusers), + ymax = maxusers), fill = "gray") + } + if (length(upturns$date) > 0) + plot <- plot + + geom_point(data = upturns, aes(x = date, y = users), size = 5, + colour = "dodgerblue2") + if (length(downturns$date) > 0) + plot <- plot + + geom_point(data = downturns, aes(x = date, y = users), size = 5, + colour = "firebrick2") + } + plot <- plot + geom_line(size = 1) + scale_x_date(name = paste("\nThe Tor Project - ", "https://metrics.torproject.org/", sep = ""), format = date_breaks$format, major = date_breaks$major, minor = date_breaks$minor) + - scale_y_continuous(name = "", limits = c(0, - ifelse(length(na.omit(u$users)) == 0, 0, - max(u$users, na.rm = TRUE))), formatter = formatter) + + scale_y_continuous(name = "", limits = c(0, max_y), + formatter = formatter) opts(title = title) ggsave(filename = path, width = 8, height = 5, dpi = 72) }
-plot_userstats_relay_country <- function(start, end, country, path) { - plot_userstats(start, end, 'relay', 'country', country, path) +plot_userstats_relay_country <- function(start, end, country, events, + path) { + plot_userstats(start, end, 'relay', 'country', country, events, path) }
plot_userstats_bridge_country <- function(start, end, country, path) { - plot_userstats(start, end, 'bridge', 'country', country, path) + plot_userstats(start, end, 'bridge', 'country', country, 'off', path) }
plot_userstats_bridge_transport <- function(start, end, transport, path) { - plot_userstats(start, end, 'bridge', 'transport', transport, path) + plot_userstats(start, end, 'bridge', 'transport', transport, 'off', + path) }
plot_userstats_bridge_version <- function(start, end, version, path) { - plot_userstats(start, end, 'bridge', 'version', version, path) + plot_userstats(start, end, 'bridge', 'version', version, 'off', path) }
diff --git a/rserve/tables.R b/rserve/tables.R index e0dc1e4..59593d6 100644 --- a/rserve/tables.R +++ b/rserve/tables.R @@ -42,9 +42,6 @@ write_censorship_events <- function(start, end, path) { u <- data.frame(date = u$date, country = u$country, users = u$r * (u$bwp * u$brn / u$bwn - u$brp) / (u$bwr * u$brn / u$bwn - u$brr) / 10) - dates <- seq(from = as.Date(start, "%Y-%m-%d"), - to = as.Date(end, "%Y-%m-%d"), by="1 day") - missing <- setdiff(dates, u$date) r <- read.csv( "/srv/metrics.torproject.org/web/detector/direct-users-ranges.csv", stringsAsFactors = FALSE) @@ -60,10 +57,10 @@ write_censorship_events <- function(start, end, path) { by = list(country = r$country), sum) r <- r[!(r$country %in% c("zy", "??", "a1", "a2", "o1", "ap", "eu")), ] r <- r[order(r$downturn, r$upturn, decreasing = TRUE), ] - r <- r[1:10, ] + r <- r[1:10, ] r <- data.frame(cc = r$country, country = sub('the ', '', countrynames(as.character(r$country))), - downturns = r$downturn, + downturns = r$downturn, upturns = r$upturn) write.csv(r, path, quote = FALSE, row.names = FALSE) } @@ -122,3 +119,34 @@ write_userstats_bridge <- function(start, end, path) { write_userstats(start, end, 'bridge', path) }
+write_userstats_censorship_events <- function(start, end, path) { + end <- min(end, as.character(Sys.Date())) + u <- read.csv(paste("/srv/metrics.torproject.org/task-8462-graphs/", + "task-8462/userstats.csv", sep = ""), + stringsAsFactors = FALSE) + u <- u[u$date >= start & u$date <= end & u$country != '' & + u$transport == '' & u$version == '' & u$node == 'relay', + c("date", "country", "users")] + r <- read.csv( + "/srv/metrics.torproject.org/web/detector/userstats-ranges.csv", + stringsAsFactors = FALSE) + r <- r[r$date >= start & r$date <= end, + c("date", "country", "minusers", "maxusers")] + r <- cast(rbind(melt(u, id.vars = c("date", "country")), + melt(r, id.vars = c("date", "country")))) + r <- na.omit(r[r$users < r$minusers | r$users > r$maxusers, ]) + r <- data.frame(date = r$date, country = r$country, + upturn = ifelse(r$users > r$maxusers, 1, 0), + downturn = ifelse(r$users < r$minusers, 1, 0)) + r <- aggregate(r[, c("upturn", "downturn")], + by = list(country = r$country), sum) + r <- r[!(r$country %in% c("zy", "??", "a1", "a2", "o1", "ap", "eu")), ] + r <- r[order(r$downturn, r$upturn, decreasing = TRUE), ] + r <- r[1:10, ] + r <- data.frame(cc = r$country, + country = sub('the ', '', countrynames(as.character(r$country))), + downturns = r$downturn, + upturns = r$upturn) + write.csv(r, path, quote = FALSE, row.names = FALSE) +} + diff --git a/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java b/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java index 3ac99bb..c79f1e3 100644 --- a/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java +++ b/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java @@ -52,7 +52,7 @@ public class GraphsSubpagesServlet extends HttpServlet { this.availableGraphsSubpageTables.put("users.html", new HashSet<String>(Arrays.asList(( "direct-users,censorship-events,bridge-users,userstats-relay," - + "userstats-bridge").split(",")))); + + "userstats-censorship-events,userstats-bridge").split(","))));
this.knownCountries = Countries.getInstance().getCountryList(); } diff --git a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java index 84d61c6..2fa0cc6 100644 --- a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java +++ b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java @@ -76,6 +76,7 @@ public class RObjectGenerator implements ServletContextListener { this.availableCsvFiles.add("torperf"); this.availableCsvFiles.add("torperf-failures"); this.availableCsvFiles.add("userstats"); + this.availableCsvFiles.add("userstats-detector"); this.availableCsvFiles.add("versions");
this.availableTables = new HashMap<String, String>(); @@ -84,6 +85,8 @@ public class RObjectGenerator implements ServletContextListener { this.availableTables.put("bridge-users", "start,end,filename"); this.availableTables.put("userstats-relay", "start,end,filename"); this.availableTables.put("userstats-bridge", "start,end,filename"); + this.availableTables.put("userstats-censorship-events", + "start,end,filename"); TableParameterChecker.getInstance().setAvailableTables( availableTables);
@@ -111,7 +114,7 @@ public class RObjectGenerator implements ServletContextListener { this.availableGraphs.put("fast-exits", "start,end,filename"); this.availableGraphs.put("almost-fast-exits", "start,end,filename"); this.availableGraphs.put("userstats-relay-country", - "start,end,country,filename"); + "start,end,country,events,filename"); this.availableGraphs.put("userstats-bridge-country", "start,end,country,filename"); this.availableGraphs.put("userstats-bridge-transport", diff --git a/web/WEB-INF/users.jsp b/web/WEB-INF/users.jsp index 2a7bede..456aaab 100644 --- a/web/WEB-INF/users.jsp +++ b/web/WEB-INF/users.jsp @@ -208,6 +208,9 @@ Tor users (direct and bridge) per month by country.</p> daily Tor users (direct and bridge) per month by country.</p> <br>
+<hr> +<hr> + <a name="userstats"></a> <h3><a href="#userstats" class="anchor">New approach to estimating daily Tor users (BETA)</a></h3> @@ -257,6 +260,14 @@ It's yet to be decided which approach is more correct.</font> </c:forEach> </select> </p><p> + Show possible censorship events if available (<a + href="http://research.torproject.org/techreports/detector-2011-09-09.pdf%22%3EBETA</a>) + <select name="events"> + <option value="off">Off</option> + <option value="on"<c:if test="${userstats_relay_country_events[0] eq 'on'}"> selected</c:if>>On: both points and expected range</option> + <option value="points"<c:if test="${userstats_relay_country_events[0] eq 'points'}"> selected</c:if>>On: points only, no expected range</option> + </select> + </p><p> <input class="submit" type="submit" value="Update graph"> </p> </div> @@ -296,6 +307,40 @@ It's yet to be decided which approach is more correct.</font> </c:forEach> </table> <hr> +<a name="userstats-censorship-events"></a> +<p><b>Top-10 countries by possible censorship events (<a + href="http://research.torproject.org/techreports/detector-2011-09-09.pdf%22%3EBETA</a>):</b></p> +<form action="users.html#userstats-censorship-events"> + <div class="formrow"> + <input type="hidden" name="table" value="userstats-censorship-events"> + <p> + <label>Start date (yyyy-mm-dd):</label> + <input type="text" name="start" size="10" + value="<c:choose><c:when test="${fn:length(userstats_censorship_events_start) == 0}">${default_start_date}</c:when><c:otherwise>${userstats_censorship_events_start[0]}</c:otherwise></c:choose>"> + <label>End date (yyyy-mm-dd):</label> + <input type="text" name="end" size="10" + value="<c:choose><c:when test="${fn:length(userstats_censorship_events_end) == 0}">${default_end_date}</c:when><c:otherwise>${userstats_censorship_events_end[0]}</c:otherwise></c:choose>"> + </p><p> + <input class="submit" type="submit" value="Update table"> + </p> + </div> +</form> +<br> +<table> + <tr> + <th>Country</th> + <th>Downturns</th> + <th>Upturns</th> + </tr> + <c:forEach var="row" items="${userstats_censorship_events_tabledata}"> + <tr> + <td><a href="users.html?graph=direct-users&country=${row['cc']}&events=on#direct-users">${row['country']}</a> </td> + <td>${row['downturns']}</td> + <td>${row['upturns']}</td> + </tr> + </c:forEach> +</table> +<hr>
<a name="userstats-bridge-country"></a> <p><b>Bridge users by country (BETA):</b></p>