commit 14fea500ec0747887fa67ebbb37804d1c76b5041 Author: Karsten Loesing karsten.loesing@gmx.net Date: Tue Aug 28 19:12:05 2012 +0200
Add advertised bw/history graph by Exit/Guard flag.
Implements #6671. --- db/tordir.sql | 45 ++++++++++++++ etc/web.xml | 4 + rserve/csv.R | 26 ++++++++ rserve/graphs.R | 63 ++++++++++++++++++++ src/org/torproject/ernie/web/GraphDataServlet.java | 2 + src/org/torproject/ernie/web/RObjectGenerator.java | 2 + web/WEB-INF/network.jsp | 35 +++++++++++ 7 files changed, 177 insertions(+), 0 deletions(-)
diff --git a/db/tordir.sql b/db/tordir.sql index 93820bc..8aa039c 100644 --- a/db/tordir.sql +++ b/db/tordir.sql @@ -219,6 +219,15 @@ CREATE TABLE total_bwhist ( CONSTRAINT total_bwhist_pkey PRIMARY KEY(date) );
+-- TABLE bandwidth_flags +CREATE TABLE bandwidth_flags ( + date DATE NOT NULL, + isexit BOOLEAN NOT NULL, + isguard BOOLEAN NOT NULL, + bwadvertised BIGINT NOT NULL, + CONSTRAINT bandwidth_flags_pkey PRIMARY KEY(date, isexit, isguard) +); + -- TABLE bwhist_flags CREATE TABLE bwhist_flags ( date DATE NOT NULL, @@ -647,6 +656,40 @@ CREATE OR REPLACE FUNCTION refresh_total_bwhist() RETURNS INTEGER AS $$ END; $$ LANGUAGE plpgsql;
+CREATE OR REPLACE FUNCTION refresh_bandwidth_flags() RETURNS INTEGER AS $$ + DECLARE + min_date TIMESTAMP WITHOUT TIME ZONE; + max_date TIMESTAMP WITHOUT TIME ZONE; + BEGIN + + min_date := (SELECT MIN(date) FROM updates); + max_date := (SELECT MAX(date) + 1 FROM updates); + + DELETE FROM bandwidth_flags WHERE date IN (SELECT date FROM updates); + EXECUTE ' + INSERT INTO bandwidth_flags (date, isexit, isguard, bwadvertised) + SELECT DATE(validafter) AS date, + BOOL_OR(isexit) AS isexit, + BOOL_OR(isguard) AS isguard, + (SUM(LEAST(bandwidthavg, bandwidthobserved)) + / relay_statuses_per_day.count)::BIGINT AS bwadvertised + FROM descriptor RIGHT JOIN statusentry + ON descriptor.descriptor = statusentry.descriptor + JOIN relay_statuses_per_day + ON DATE(validafter) = relay_statuses_per_day.date + WHERE isrunning = TRUE + AND validafter >= ''' || min_date || ''' + AND validafter < ''' || max_date || ''' + AND DATE(validafter) IN (SELECT date FROM updates) + AND relay_statuses_per_day.date >= ''' || min_date || ''' + AND relay_statuses_per_day.date < ''' || max_date || ''' + AND DATE(relay_statuses_per_day.date) IN + (SELECT date FROM updates) + GROUP BY DATE(validafter), isexit, isguard, relay_statuses_per_day.count'; + RETURN 1; + END; +$$ LANGUAGE plpgsql; + CREATE OR REPLACE FUNCTION refresh_bwhist_flags() RETURNS INTEGER AS $$ DECLARE min_date TIMESTAMP WITHOUT TIME ZONE; @@ -923,6 +966,8 @@ CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$ PERFORM refresh_total_bandwidth(); RAISE NOTICE '% Refreshing relay bandwidth history.', timeofday(); PERFORM refresh_total_bwhist(); + RAISE NOTICE '% Refreshing total relay bandwidth by flags.', timeofday(); + PERFORM refresh_bandwidth_flags(); RAISE NOTICE '% Refreshing bandwidth history by flags.', timeofday(); PERFORM refresh_bwhist_flags(); RAISE NOTICE '% Refreshing user statistics.', timeofday(); diff --git a/etc/web.xml b/etc/web.xml index 0931a71..ccf6dd2 100644 --- a/etc/web.xml +++ b/etc/web.xml @@ -193,6 +193,10 @@ </servlet-mapping> <servlet-mapping> <servlet-name>GraphImage</servlet-name> + <url-pattern>/bandwidth-flags.png</url-pattern> + </servlet-mapping> + <servlet-mapping> + <servlet-name>GraphImage</servlet-name> <url-pattern>/dirbytes.png</url-pattern> </servlet-mapping> <servlet-mapping> diff --git a/rserve/csv.R b/rserve/csv.R index cc50b8a..d7917e0 100644 --- a/rserve/csv.R +++ b/rserve/csv.R @@ -279,3 +279,29 @@ export_dirreq_stats <- function(path) { quote = FALSE, row.names = FALSE) }
+export_bandwidth_flags <- function(path) { + drv <- dbDriver("PostgreSQL") + con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) + q <- paste("SELECT date, isexit, isguard, bwadvertised AS value", + "FROM bandwidth_flags WHERE date < current_date - 3") + rs <- dbSendQuery(con, q) + bw_desc <- fetch(rs, n = -1) + q <- paste("SELECT date, isexit, isguard,", + "(read + written) / (2 * 86400) AS value", + "FROM bwhist_flags WHERE date < current_date - 3") + rs <- dbSendQuery(con, q) + bw_hist <- fetch(rs, n = -1) + dbDisconnect(con) + dbUnloadDriver(drv) + bandwidth <- rbind(data.frame(bw_desc, type = "advbw"), + data.frame(bw_hist, type = "bwhist")) + bandwidth <- rbind( + data.frame(bandwidth[bandwidth$isguard == TRUE, ], flag = "guard"), + data.frame(bandwidth[bandwidth$isexit == TRUE, ], flag = "exit")) + bandwidth <- aggregate(list(value = bandwidth$value), + by = list(date = bandwidth$date, type = bandwidth$type, + flag = bandwidth$flag), FUN = sum) + write.csv(format(bandwidth, trim = TRUE, scientific = FALSE), path, + quote = FALSE, row.names = FALSE) +} + diff --git a/rserve/graphs.R b/rserve/graphs.R index 0434379..4799726 100644 --- a/rserve/graphs.R +++ b/rserve/graphs.R @@ -986,3 +986,66 @@ plot_almost_fast_exits <- function(start, end, path, dpi) { ggsave(filename = path, width = 8, height = 6, dpi = as.numeric(dpi)) }
+plot_bandwidth_flags <- function(start, end, path, dpi) { + drv <- dbDriver("PostgreSQL") + con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) + q <- paste("SELECT date, isexit, isguard, bwadvertised AS value ", + "FROM bandwidth_flags WHERE date >= '", start, "' AND date <= '", + end, "' AND date < current_date - 3", sep = "") + rs <- dbSendQuery(con, q) + bw_desc <- fetch(rs, n = -1) + q <- paste("SELECT date, isexit, isguard, ", + "(read + written) / (2 * 86400) ", + "AS value FROM bwhist_flags WHERE date >= '", start, + "' AND date <= '", end, "' AND date < current_date - 3", sep = "") + rs <- dbSendQuery(con, q) + bw_hist <- fetch(rs, n = -1) + dbDisconnect(con) + dbUnloadDriver(drv) + bandwidth <- rbind(data.frame(bw_desc, type = "advertised bandwidth"), + data.frame(bw_hist, type = "bandwidth history")) + bandwidth <- rbind( + data.frame(bandwidth[bandwidth$isguard == TRUE, ], flag = "Guard"), + data.frame(bandwidth[bandwidth$isexit == TRUE, ], flag = "Exit")) + bandwidth <- aggregate(list(value = bandwidth$value), + by = list(date = bandwidth$date, type = bandwidth$type, + flag = bandwidth$flag), FUN = sum) + date_breaks <- date_breaks( + as.numeric(max(as.Date(bandwidth$date, "%Y-%m-%d")) - + min(as.Date(bandwidth$date, "%Y-%m-%d")))) + dates <- seq(from = as.Date(start, "%Y-%m-%d"), + to = as.Date(end, "%Y-%m-%d"), by = "1 day") + missing <- setdiff(dates, as.Date(bandwidth$date, + origin = "1970-01-01")) + if (length(missing) > 0) { + bandwidth <- rbind(bandwidth, + data.frame(date = as.Date(missing, origin = "1970-01-01"), + type = "advertised bandwidth", flag = "Exit", value = NA), + data.frame(date = as.Date(missing, origin = "1970-01-01"), + type = "bandwidth history", flag = "Exit", value = NA), + data.frame(date = as.Date(missing, origin = "1970-01-01"), + type = "advertised bandwidth", flag = "Guard", value = NA), + data.frame(date = as.Date(missing, origin = "1970-01-01"), + type = "bandwidth history", flag = "Guard", value = NA)) + } + bandwidth <- data.frame(date = bandwidth$date, + variable = as.factor(paste(bandwidth$flag, ", ", bandwidth$type, + sep = "")), value = bandwidth$value) + bandwidth$variable <- factor(bandwidth$variable, + levels = levels(bandwidth$variable)[c(3, 4, 1, 2)]) + ggplot(bandwidth, aes(x = as.Date(date, "%Y-%m-%d"), y = value / 2^20, + colour = variable)) + + geom_line(size = 1) + + scale_x_date(name = paste("\nThe Tor Project - ", + "https://metrics.torproject.org/", sep = ""), + format = date_breaks$format, major = date_breaks$major, + minor = date_breaks$minor) + + scale_y_continuous(name="Bandwidth (MiB/s)", + limits = c(0, max(bandwidth$value, na.rm = TRUE) / 2^20)) + + scale_colour_manual(name = "", + values = c("#E69F00", "#D6C827", "#009E73", "#00C34F")) + + opts(title = paste("Advertised bandwidth and bandwidth history by", + "relay flags"), legend.position = "top") + ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi)) +} + diff --git a/src/org/torproject/ernie/web/GraphDataServlet.java b/src/org/torproject/ernie/web/GraphDataServlet.java index 11800d7..b574e1c 100644 --- a/src/org/torproject/ernie/web/GraphDataServlet.java +++ b/src/org/torproject/ernie/web/GraphDataServlet.java @@ -64,6 +64,8 @@ public class GraphDataServlet extends HttpServlet { this.availableGraphDataFiles.put("relay-dir-bandwidth", "dirbytes"); this.availableGraphDataFiles.put("relay-bandwidth-history-by-flags", "bwhist-flags"); + this.availableGraphDataFiles.put("relay-bandwidth-by-flags", + "bandwidth-flags"); this.availableGraphDataFiles.put("direct-users-by-country", "direct-users"); this.availableGraphDataFiles.put("bridge-users-by-country", diff --git a/src/org/torproject/ernie/web/RObjectGenerator.java b/src/org/torproject/ernie/web/RObjectGenerator.java index d8517c1..e4e08bc 100644 --- a/src/org/torproject/ernie/web/RObjectGenerator.java +++ b/src/org/torproject/ernie/web/RObjectGenerator.java @@ -54,6 +54,7 @@ public class RObjectGenerator implements ServletContextListener { /* Initialize map of available CSV files. */ this.availableCsvFiles = new TreeSet<String>(); this.availableCsvFiles.add("bandwidth"); + this.availableCsvFiles.add("bandwidth-flags"); this.availableCsvFiles.add("bridge-users"); this.availableCsvFiles.add("bwhist-flags"); this.availableCsvFiles.add("connbidirect"); @@ -88,6 +89,7 @@ public class RObjectGenerator implements ServletContextListener { this.availableGraphs.put("versions", "start,end,filename,dpi"); this.availableGraphs.put("platforms", "start,end,filename,dpi"); this.availableGraphs.put("bandwidth", "start,end,filename,dpi"); + this.availableGraphs.put("bandwidth-flags", "start,end,filename,dpi"); this.availableGraphs.put("bwhist-flags", "start,end,filename,dpi"); this.availableGraphs.put("dirbytes", "start,end,filename,dpi"); this.availableGraphs.put("direct-users", diff --git a/web/WEB-INF/network.jsp b/web/WEB-INF/network.jsp index 04fffef..d73f672 100644 --- a/web/WEB-INF/network.jsp +++ b/web/WEB-INF/network.jsp @@ -303,6 +303,41 @@ Exit and/or Guard flags assigned by the directory authorities.</p> <p><a href="csv/bwhist-flags.csv">CSV</a> file containing all data.</p> <br>
+<a name="bandwidth-flags"></a> +<h3><a href="#bandwidth-flags" class="anchor">Advertised bandwidth and +bandwidth history by relay flags</a></h3> +<br> +<p>The following graph shows the advertised bandwidth and bandwidth +history of all relays with the Exit and/or Guard flags assigned by the +directory authorities. +Note that these sets possibly overlap with relays having both Exit and +Guard flag.</p> +<img src="bandwidth-flags.png${bandwidth_flags_url}" + width="576" height="360" alt="Advertised bandwidth and bandwidth history by relay flags graph"> +<form action="network.html#bandwidth-flags"> + <div class="formrow"> + <input type="hidden" name="graph" value="bandwidth-flags"> + <p> + <label>Start date (yyyy-mm-dd):</label> + <input type="text" name="start" size="10" + value="<c:choose><c:when test="${fn:length(bandwidth_flags_start) == 0}">${default_start_date}</c:when><c:otherwise>${bandwidth_flags_start[0]}</c:otherwise></c:choose>"> + <label>End date (yyyy-mm-dd):</label> + <input type="text" name="end" size="10" + value="<c:choose><c:when test="${fn:length(bandwidth_flags_end) == 0}">${default_end_date}</c:when><c:otherwise>${bandwidth_flags_end[0]}</c:otherwise></c:choose>"> + </p><p> + Resolution: <select name="dpi"> + <option value="72"<c:if test="${bandwidth_flags_dpi[0] eq '72'}"> selected</c:if>>Screen - 576x360</option> + <option value="150"<c:if test="${bandwidth_flags_dpi[0] eq '150'}"> selected</c:if>>Print low - 1200x750</option> + <option value="300"<c:if test="${bandwidth_flags_dpi[0] eq '300'}"> selected</c:if>>Print high - 2400x1500</option> + </select> + </p><p> + <input class="submit" type="submit" value="Update graph"> + </p> + </div> +</form> +<p><a href="csv/bandwidth-flags.csv">CSV</a> file containing all data.</p> +<br> + <a name="dirbytes"></a> <h3><a href="#dirbytes" class="anchor">Number of bytes spent on answering directory requests</a></h3>