[tor-commits] [metrics-web/master] Add advertised bw/history graph by Exit/Guard flag.

karsten at torproject.org karsten at torproject.org
Tue Aug 28 17:13:07 UTC 2012


commit 14fea500ec0747887fa67ebbb37804d1c76b5041
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Aug 28 19:12:05 2012 +0200

    Add advertised bw/history graph by Exit/Guard flag.
    
    Implements #6671.
---
 db/tordir.sql                                      |   45 ++++++++++++++
 etc/web.xml                                        |    4 +
 rserve/csv.R                                       |   26 ++++++++
 rserve/graphs.R                                    |   63 ++++++++++++++++++++
 src/org/torproject/ernie/web/GraphDataServlet.java |    2 +
 src/org/torproject/ernie/web/RObjectGenerator.java |    2 +
 web/WEB-INF/network.jsp                            |   35 +++++++++++
 7 files changed, 177 insertions(+), 0 deletions(-)

diff --git a/db/tordir.sql b/db/tordir.sql
index 93820bc..8aa039c 100644
--- a/db/tordir.sql
+++ b/db/tordir.sql
@@ -219,6 +219,15 @@ CREATE TABLE total_bwhist (
     CONSTRAINT total_bwhist_pkey PRIMARY KEY(date)
 );
 
+-- TABLE bandwidth_flags
+CREATE TABLE bandwidth_flags (
+    date DATE NOT NULL,
+    isexit BOOLEAN NOT NULL,
+    isguard BOOLEAN NOT NULL,
+    bwadvertised BIGINT NOT NULL,
+    CONSTRAINT bandwidth_flags_pkey PRIMARY KEY(date, isexit, isguard)
+);
+
 -- TABLE bwhist_flags
 CREATE TABLE bwhist_flags (
     date DATE NOT NULL,
@@ -647,6 +656,40 @@ CREATE OR REPLACE FUNCTION refresh_total_bwhist() RETURNS INTEGER AS $$
   END;
 $$ LANGUAGE plpgsql;
 
+CREATE OR REPLACE FUNCTION refresh_bandwidth_flags() RETURNS INTEGER AS $$
+    DECLARE
+        min_date TIMESTAMP WITHOUT TIME ZONE;
+        max_date TIMESTAMP WITHOUT TIME ZONE;
+    BEGIN
+
+    min_date := (SELECT MIN(date) FROM updates);
+    max_date := (SELECT MAX(date) + 1 FROM updates);
+
+  DELETE FROM bandwidth_flags WHERE date IN (SELECT date FROM updates);
+  EXECUTE '
+  INSERT INTO bandwidth_flags (date, isexit, isguard, bwadvertised)
+  SELECT DATE(validafter) AS date,
+      BOOL_OR(isexit) AS isexit,
+      BOOL_OR(isguard) AS isguard,
+      (SUM(LEAST(bandwidthavg, bandwidthobserved))
+      / relay_statuses_per_day.count)::BIGINT AS bwadvertised
+    FROM descriptor RIGHT JOIN statusentry
+    ON descriptor.descriptor = statusentry.descriptor
+    JOIN relay_statuses_per_day
+    ON DATE(validafter) = relay_statuses_per_day.date
+    WHERE isrunning = TRUE
+          AND validafter >= ''' || min_date || '''
+          AND validafter < ''' || max_date || '''
+          AND DATE(validafter) IN (SELECT date FROM updates)
+          AND relay_statuses_per_day.date >= ''' || min_date || '''
+          AND relay_statuses_per_day.date < ''' || max_date || '''
+          AND DATE(relay_statuses_per_day.date) IN
+              (SELECT date FROM updates)
+    GROUP BY DATE(validafter), isexit, isguard, relay_statuses_per_day.count';
+  RETURN 1;
+  END;
+$$ LANGUAGE plpgsql;
+
 CREATE OR REPLACE FUNCTION refresh_bwhist_flags() RETURNS INTEGER AS $$
     DECLARE
         min_date TIMESTAMP WITHOUT TIME ZONE;
@@ -923,6 +966,8 @@ CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
     PERFORM refresh_total_bandwidth();
     RAISE NOTICE '% Refreshing relay bandwidth history.', timeofday();
     PERFORM refresh_total_bwhist();
+    RAISE NOTICE '% Refreshing total relay bandwidth by flags.', timeofday();
+    PERFORM refresh_bandwidth_flags();
     RAISE NOTICE '% Refreshing bandwidth history by flags.', timeofday();
     PERFORM refresh_bwhist_flags();
     RAISE NOTICE '% Refreshing user statistics.', timeofday();
diff --git a/etc/web.xml b/etc/web.xml
index 0931a71..ccf6dd2 100644
--- a/etc/web.xml
+++ b/etc/web.xml
@@ -193,6 +193,10 @@
   </servlet-mapping>
   <servlet-mapping>
     <servlet-name>GraphImage</servlet-name>
+    <url-pattern>/bandwidth-flags.png</url-pattern>
+  </servlet-mapping>
+  <servlet-mapping>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/dirbytes.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
diff --git a/rserve/csv.R b/rserve/csv.R
index cc50b8a..d7917e0 100644
--- a/rserve/csv.R
+++ b/rserve/csv.R
@@ -279,3 +279,29 @@ export_dirreq_stats <- function(path) {
       quote = FALSE, row.names = FALSE)
 }
 
+export_bandwidth_flags <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) 
+  q <- paste("SELECT date, isexit, isguard, bwadvertised AS value",
+      "FROM bandwidth_flags WHERE date < current_date - 3")
+  rs <- dbSendQuery(con, q)
+  bw_desc <- fetch(rs, n = -1) 
+  q <- paste("SELECT date, isexit, isguard,",
+      "(read + written) / (2 * 86400) AS value",
+      "FROM bwhist_flags WHERE date < current_date - 3")
+  rs <- dbSendQuery(con, q)
+  bw_hist <- fetch(rs, n = -1) 
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  bandwidth <- rbind(data.frame(bw_desc, type = "advbw"),
+      data.frame(bw_hist, type = "bwhist"))
+  bandwidth <- rbind(
+    data.frame(bandwidth[bandwidth$isguard == TRUE, ], flag = "guard"),
+    data.frame(bandwidth[bandwidth$isexit == TRUE, ], flag = "exit"))
+  bandwidth <- aggregate(list(value = bandwidth$value),
+    by = list(date = bandwidth$date, type = bandwidth$type,
+    flag = bandwidth$flag), FUN = sum)
+  write.csv(format(bandwidth, trim = TRUE, scientific = FALSE), path,
+      quote = FALSE, row.names = FALSE)
+}
+
diff --git a/rserve/graphs.R b/rserve/graphs.R
index 0434379..4799726 100644
--- a/rserve/graphs.R
+++ b/rserve/graphs.R
@@ -986,3 +986,66 @@ plot_almost_fast_exits <- function(start, end, path, dpi) {
   ggsave(filename = path, width = 8, height = 6, dpi = as.numeric(dpi))
 }
 
+plot_bandwidth_flags <- function(start, end, path, dpi) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT date, isexit, isguard, bwadvertised AS value ",
+      "FROM bandwidth_flags WHERE date >= '", start, "' AND date <= '",
+      end, "' AND date < current_date - 3", sep = "")
+  rs <- dbSendQuery(con, q)
+  bw_desc <- fetch(rs, n = -1)
+  q <- paste("SELECT date, isexit, isguard, ",
+      "(read + written) / (2 * 86400) ",
+      "AS value FROM bwhist_flags WHERE date >= '", start,
+      "' AND date <= '", end, "' AND date < current_date - 3", sep = "")
+  rs <- dbSendQuery(con, q)
+  bw_hist <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  bandwidth <- rbind(data.frame(bw_desc, type = "advertised bandwidth"),
+      data.frame(bw_hist, type = "bandwidth history"))
+  bandwidth <- rbind(
+    data.frame(bandwidth[bandwidth$isguard == TRUE, ], flag = "Guard"),
+    data.frame(bandwidth[bandwidth$isexit == TRUE, ], flag = "Exit"))
+  bandwidth <- aggregate(list(value = bandwidth$value),
+    by = list(date = bandwidth$date, type = bandwidth$type,
+    flag = bandwidth$flag), FUN = sum)
+  date_breaks <- date_breaks(
+    as.numeric(max(as.Date(bandwidth$date, "%Y-%m-%d")) -
+    min(as.Date(bandwidth$date, "%Y-%m-%d"))))
+  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
+      to = as.Date(end, "%Y-%m-%d"), by = "1 day")
+  missing <- setdiff(dates, as.Date(bandwidth$date,
+    origin = "1970-01-01"))
+  if (length(missing) > 0) {
+    bandwidth <- rbind(bandwidth,
+        data.frame(date = as.Date(missing, origin = "1970-01-01"),
+        type = "advertised bandwidth", flag = "Exit", value = NA),
+        data.frame(date = as.Date(missing, origin = "1970-01-01"),
+        type = "bandwidth history", flag = "Exit", value = NA),
+        data.frame(date = as.Date(missing, origin = "1970-01-01"),
+        type = "advertised bandwidth", flag = "Guard", value = NA),
+        data.frame(date = as.Date(missing, origin = "1970-01-01"),
+        type = "bandwidth history", flag = "Guard", value = NA))
+  }
+  bandwidth <- data.frame(date = bandwidth$date,
+    variable = as.factor(paste(bandwidth$flag, ", ", bandwidth$type,
+    sep = "")), value = bandwidth$value)
+  bandwidth$variable <- factor(bandwidth$variable,
+    levels = levels(bandwidth$variable)[c(3, 4, 1, 2)])
+  ggplot(bandwidth, aes(x = as.Date(date, "%Y-%m-%d"), y = value / 2^20,
+      colour = variable)) +
+    geom_line(size = 1) +
+    scale_x_date(name = paste("\nThe Tor Project - ",
+        "https://metrics.torproject.org/", sep = ""),
+        format = date_breaks$format, major = date_breaks$major,
+        minor = date_breaks$minor) +
+    scale_y_continuous(name="Bandwidth (MiB/s)",
+        limits = c(0, max(bandwidth$value, na.rm = TRUE) / 2^20)) +
+    scale_colour_manual(name = "",
+        values = c("#E69F00", "#D6C827", "#009E73", "#00C34F")) +
+    opts(title = paste("Advertised bandwidth and bandwidth history by",
+        "relay flags"), legend.position = "top")
+  ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi))
+}
+
diff --git a/src/org/torproject/ernie/web/GraphDataServlet.java b/src/org/torproject/ernie/web/GraphDataServlet.java
index 11800d7..b574e1c 100644
--- a/src/org/torproject/ernie/web/GraphDataServlet.java
+++ b/src/org/torproject/ernie/web/GraphDataServlet.java
@@ -64,6 +64,8 @@ public class GraphDataServlet extends HttpServlet {
     this.availableGraphDataFiles.put("relay-dir-bandwidth", "dirbytes");
     this.availableGraphDataFiles.put("relay-bandwidth-history-by-flags",
         "bwhist-flags");
+    this.availableGraphDataFiles.put("relay-bandwidth-by-flags",
+        "bandwidth-flags");
     this.availableGraphDataFiles.put("direct-users-by-country",
         "direct-users");
     this.availableGraphDataFiles.put("bridge-users-by-country",
diff --git a/src/org/torproject/ernie/web/RObjectGenerator.java b/src/org/torproject/ernie/web/RObjectGenerator.java
index d8517c1..e4e08bc 100644
--- a/src/org/torproject/ernie/web/RObjectGenerator.java
+++ b/src/org/torproject/ernie/web/RObjectGenerator.java
@@ -54,6 +54,7 @@ public class RObjectGenerator implements ServletContextListener {
     /* Initialize map of available CSV files. */
     this.availableCsvFiles = new TreeSet<String>();
     this.availableCsvFiles.add("bandwidth");
+    this.availableCsvFiles.add("bandwidth-flags");
     this.availableCsvFiles.add("bridge-users");
     this.availableCsvFiles.add("bwhist-flags");
     this.availableCsvFiles.add("connbidirect");
@@ -88,6 +89,7 @@ public class RObjectGenerator implements ServletContextListener {
     this.availableGraphs.put("versions", "start,end,filename,dpi");
     this.availableGraphs.put("platforms", "start,end,filename,dpi");
     this.availableGraphs.put("bandwidth", "start,end,filename,dpi");
+    this.availableGraphs.put("bandwidth-flags", "start,end,filename,dpi");
     this.availableGraphs.put("bwhist-flags", "start,end,filename,dpi");
     this.availableGraphs.put("dirbytes", "start,end,filename,dpi");
     this.availableGraphs.put("direct-users",
diff --git a/web/WEB-INF/network.jsp b/web/WEB-INF/network.jsp
index 04fffef..d73f672 100644
--- a/web/WEB-INF/network.jsp
+++ b/web/WEB-INF/network.jsp
@@ -303,6 +303,41 @@ Exit and/or Guard flags assigned by the directory authorities.</p>
 <p><a href="csv/bwhist-flags.csv">CSV</a> file containing all data.</p>
 <br>
 
+<a name="bandwidth-flags"></a>
+<h3><a href="#bandwidth-flags" class="anchor">Advertised bandwidth and
+bandwidth history by relay flags</a></h3>
+<br>
+<p>The following graph shows the advertised bandwidth and bandwidth
+history of all relays with the Exit and/or Guard flags assigned by the
+directory authorities.
+Note that these sets possibly overlap with relays having both Exit and
+Guard flag.</p>
+<img src="bandwidth-flags.png${bandwidth_flags_url}"
+     width="576" height="360" alt="Advertised bandwidth and bandwidth history by relay flags graph">
+<form action="network.html#bandwidth-flags">
+  <div class="formrow">
+    <input type="hidden" name="graph" value="bandwidth-flags">
+    <p>
+    <label>Start date (yyyy-mm-dd):</label>
+      <input type="text" name="start" size="10"
+             value="<c:choose><c:when test="${fn:length(bandwidth_flags_start) == 0}">${default_start_date}</c:when><c:otherwise>${bandwidth_flags_start[0]}</c:otherwise></c:choose>">
+    <label>End date (yyyy-mm-dd):</label>
+      <input type="text" name="end" size="10"
+             value="<c:choose><c:when test="${fn:length(bandwidth_flags_end) == 0}">${default_end_date}</c:when><c:otherwise>${bandwidth_flags_end[0]}</c:otherwise></c:choose>">
+    </p><p>
+      Resolution: <select name="dpi">
+        <option value="72"<c:if test="${bandwidth_flags_dpi[0] eq '72'}"> selected</c:if>>Screen - 576x360</option>
+        <option value="150"<c:if test="${bandwidth_flags_dpi[0] eq '150'}"> selected</c:if>>Print low - 1200x750</option>
+        <option value="300"<c:if test="${bandwidth_flags_dpi[0] eq '300'}"> selected</c:if>>Print high - 2400x1500</option>
+      </select>
+    </p><p>
+    <input class="submit" type="submit" value="Update graph">
+    </p>
+  </div>
+</form>
+<p><a href="csv/bandwidth-flags.csv">CSV</a> file containing all data.</p>
+<br>
+
 <a name="dirbytes"></a>
 <h3><a href="#dirbytes" class="anchor">Number of bytes spent on answering
 directory requests</a></h3>



More information about the tor-commits mailing list