[or-cvs] [metrics-web/master] Generate .csv files on demand.

karsten at torproject.org karsten at torproject.org
Tue Oct 26 17:22:21 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Tue, 26 Oct 2010 14:01:53 +0200
Subject: Generate .csv files on demand.
Commit: 45cbdaee16934cba96b92c8a2e47dbfb0e5cac92

---
 build.xml                                          |    3 +-
 etc/web.xml                                        |   95 ++++++---
 rserve/csv.R                                       |  238 ++++++++++++++++++++
 src/org/torproject/ernie/web/CsvServlet.java       |  102 +++++++++
 src/org/torproject/ernie/web/GraphGenerator.java   |   78 -------
 .../torproject/ernie/web/GraphImageServlet.java    |   21 +-
 src/org/torproject/ernie/web/RObjectGenerator.java |  139 ++++++++++++
 web/WEB-INF/network.jsp                            |    6 +
 8 files changed, 560 insertions(+), 122 deletions(-)
 create mode 100644 rserve/csv.R
 create mode 100644 src/org/torproject/ernie/web/CsvServlet.java
 delete mode 100644 src/org/torproject/ernie/web/GraphGenerator.java
 create mode 100644 src/org/torproject/ernie/web/RObjectGenerator.java

diff --git a/build.xml b/build.xml
index dad4dd2..948f7d6 100644
--- a/build.xml
+++ b/build.xml
@@ -18,7 +18,8 @@
            debug="true"
            deprecation="true"
            optimize="false"
-           failonerror="true">
+           failonerror="true"
+           includeantruntime="false">
       <classpath>
         <fileset dir="${libs}"/>
       </classpath>
diff --git a/etc/web.xml b/etc/web.xml
index 5905b53..cc2b3db 100644
--- a/etc/web.xml
+++ b/etc/web.xml
@@ -16,6 +16,7 @@
     <servlet-name>Index</servlet-name>
     <url-pattern>/index.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>Graphs</servlet-name>
     <servlet-class>
@@ -26,6 +27,7 @@
     <servlet-name>Graphs</servlet-name>
     <url-pattern>/graphs.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>GraphsSubpages</servlet-name>
     <servlet-class>
@@ -48,6 +50,7 @@
     <servlet-name>GraphsSubpages</servlet-name>
     <url-pattern>/performance.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>Research</servlet-name>
     <servlet-class>
@@ -58,6 +61,7 @@
     <servlet-name>Research</servlet-name>
     <url-pattern>/research.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>ResearchData</servlet-name>
     <servlet-class>
@@ -80,6 +84,7 @@
     <servlet-name>ResearchData</servlet-name>
     <url-pattern>/data.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>ResearchPapers</servlet-name>
     <servlet-class>
@@ -90,6 +95,7 @@
     <servlet-name>ResearchPapers</servlet-name>
     <url-pattern>/papers.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>ResearchTools</servlet-name>
     <servlet-class>
@@ -100,6 +106,7 @@
     <servlet-name>ResearchTools</servlet-name>
     <url-pattern>/tools.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>Status</servlet-name>
     <servlet-class>
@@ -121,6 +128,7 @@
     <servlet-name>Relay</servlet-name>
     <url-pattern>/relay.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>RelaySearch</servlet-name>
     <servlet-class>
@@ -131,78 +139,73 @@
     <servlet-name>RelaySearch</servlet-name>
     <url-pattern>/relay-search.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <servlet-class>
       org.torproject.ernie.web.GraphImageServlet
     </servlet-class>
-    <init-param>
-      <param-name>rserveHost</param-name>
-      <param-value>localhost</param-value>
-    </init-param>
-    <init-param>
-      <param-name>rservePort</param-name>
-      <param-value>6311</param-value>
-    </init-param>
-    <init-param>
-      <param-name>maxCacheAge</param-name>
-      <param-value>21600</param-value>
-    </init-param>
-    <init-param>
-      <param-name>cachedGraphsDir</param-name>
-      <param-value>
-        /srv/metrics.torproject.org/web/rserve/graphs/
-      </param-value>
-    </init-param>
   </servlet>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/networksize.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/relayflags.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/relayflags-hour.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/versions.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/platforms.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/bandwidth.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/dirbytes.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/new-users.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/direct-users.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/bridge-users.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/gettor.png</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>GraphImageServlet</servlet-name>
+    <servlet-name>GraphImage</servlet-name>
     <url-pattern>/torperf.png</url-pattern>
   </servlet-mapping>
+
+  <servlet>
+    <servlet-name>Csv</servlet-name>
+    <servlet-class>
+      org.torproject.ernie.web.CsvServlet
+    </servlet-class>
+  </servlet>
+  <servlet-mapping>
+    <servlet-name>Csv</servlet-name>
+    <url-pattern>/csv/*</url-pattern>
+  </servlet-mapping>
+
   <servlet>
     <servlet-name>ExoneraTor</servlet-name>
     <servlet-class>
@@ -213,6 +216,7 @@
     <servlet-name>ExoneraTor</servlet-name>
     <url-pattern>/exonerator.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>ServerDescriptor</servlet-name>
     <servlet-class>
@@ -223,6 +227,7 @@
     <servlet-name>ServerDescriptor</servlet-name>
     <url-pattern>/serverdesc</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>ExtraInfoDescriptor</servlet-name>
     <servlet-class>
@@ -233,6 +238,7 @@
     <servlet-name>ExtraInfoDescriptor</servlet-name>
     <url-pattern>/extrainfodesc</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>Descriptor</servlet-name>
     <servlet-class>
@@ -243,6 +249,7 @@
     <servlet-name>Descriptor</servlet-name>
     <url-pattern>/descriptor.html</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>Consensus</servlet-name>
     <servlet-class>
@@ -253,6 +260,7 @@
     <servlet-name>Consensus</servlet-name>
     <url-pattern>/consensus</url-pattern>
   </servlet-mapping>
+
   <servlet>
     <servlet-name>ErnieGeneratedFile</servlet-name>
     <servlet-class>
@@ -343,5 +351,30 @@
     <res-auth>Container</res-auth>
   </resource-ref>
 
+  <context-param>
+    <param-name>rserveHost</param-name>
+    <param-value>localhost</param-value>
+  </context-param>
+  <context-param>
+    <param-name>rservePort</param-name>
+    <param-value>6311</param-value>
+  </context-param>
+  <context-param>
+    <param-name>maxCacheAge</param-name>
+    <param-value>21600</param-value>
+  </context-param>
+  <context-param>
+    <param-name>cachedGraphsDir</param-name>
+    <param-value>
+      /srv/metrics.torproject.org/web/rserve/graphs/
+    </param-value>
+  </context-param>
+
+  <listener>
+    <listener-class>
+      org.torproject.ernie.web.RObjectGenerator
+    </listener-class>
+  </listener>
+
 </web-app>
 
diff --git a/rserve/csv.R b/rserve/csv.R
new file mode 100644
index 0000000..2c27604
--- /dev/null
+++ b/rserve/csv.R
@@ -0,0 +1,238 @@
+export_networksize <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- "SELECT date, avg_running AS relays FROM network_size"
+  rs <- dbSendQuery(con, q)
+  relays <- fetch(rs, n = -1)
+  q <- "SELECT date, avg_running AS bridges FROM bridge_network_size"
+  rs <- dbSendQuery(con, q)
+  bridges <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  networksize <- rbind(melt(relays, "date"), melt(bridges, "date"))
+  networksize <- cast(networksize, date ~ variable)
+  networksize <- networksize[order(networksize$date), ]
+  write.csv(networksize, path, quote = FALSE, row.names = FALSE)
+}
+
+export_versions <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- "SELECT date, version, relays FROM relay_versions"
+  rs <- dbSendQuery(con, q)
+  versions <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  versions <- cast(versions, date ~ version, value = "relays")
+  versions <- versions[order(versions$date), ]
+  write.csv(versions, path, quote = FALSE, row.names = FALSE)
+}
+
+export_platforms <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT date, avg_linux AS linux, avg_darwin AS darwin,",
+      "avg_bsd AS bsd, avg_windows AS windows, avg_other AS other",
+      "FROM relay_platforms ORDER BY date")
+  rs <- dbSendQuery(con, q)
+  platforms <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  write.csv(platforms, path, quote = FALSE, row.names = FALSE)
+}
+
+export_bandwidth <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- "SELECT date, bwadvertised FROM total_bandwidth"
+  rs <- dbSendQuery(con, q)
+  bw_desc <- fetch(rs, n = -1)
+  q <- paste("SELECT date, read, written FROM total_bwhist",
+      "WHERE date < (SELECT MAX(date) FROM total_bwhist) - 1")
+  rs <- dbSendQuery(con, q)
+  bw_hist <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  bandwidth <- rbind(data.frame(date = bw_desc$date,
+      value = bw_desc$bwadvertised, variable = "bwadv"),
+    data.frame(date = bw_hist$date, value = floor((bw_hist$read +
+      bw_hist$written) / (2 * 86400)), variable = "bwhist"))
+  bandwidth <- cast(bandwidth, date ~ variable, value = "value")
+  bandwidth <- bandwidth[order(bandwidth$date), ]
+  write.csv(bandwidth, path, quote = FALSE, row.names = FALSE)
+}
+
+export_dirbytes <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT date, read / 86400 AS read,",
+      "written / 86400 AS written, dirread / 86400 AS dirread,",
+      "dirwritten / 86400 AS dirwritten FROM total_bwhist",
+      "WHERE date < (SELECT MAX(date) FROM total_bwhist) - 1",
+      "ORDER BY date")
+  rs <- dbSendQuery(con, q)
+  bw_hist <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  write.csv(bw_hist, path, quote = FALSE, row.names = FALSE)
+}
+
+export_relayflags <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT date, avg_running AS running, avg_exit AS exit,",
+      "avg_guard AS guard, avg_fast AS fast, avg_stable AS stable",
+      "FROM network_size ORDER BY date")
+  rs <- dbSendQuery(con, q)
+  relayflags <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  write.csv(relayflags, path, quote = FALSE, row.names = FALSE)
+}
+
+export_relayflags_hour <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+   q <- paste("SELECT validafter, avg_running AS running,",
+      "avg_exit AS exit, avg_guard AS guard, avg_fast AS fast,",
+      "avg_stable AS stable FROM network_size_hour ORDER BY validafter")
+  rs <- dbSendQuery(con, q)
+  relayflags <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  write.csv(relayflags, path, quote = FALSE, row.names = FALSE)
+}
+
+export_new_users <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT date, country, 6 * requests AS newusers",
+      "FROM dirreq_stats",
+      "WHERE source = '68333D0761BCF397A587A0C0B963E4A9E99EC4D3'",
+      "OR source = 'F2044413DAC2E02E3D6BCF4735A19BCA1DE97281'",
+      "ORDER BY date, country")
+  rs <- dbSendQuery(con, q)
+  newusers <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  newusers <- cast(newusers, date ~ country, value = "newusers")
+  names(newusers)[names(newusers) == "zy"] <- "all"
+  write.csv(newusers, path, quote = FALSE, row.names = FALSE)
+}
+
+export_direct_users <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT date, country,",
+      "FLOOR(10 * requests / share) AS directusers",
+      "FROM dirreq_stats WHERE share >= 1",
+      "AND source = '8522EB98C91496E80EC238E732594D1509158E77'",
+      "ORDER BY date, country")
+  rs <- dbSendQuery(con, q)
+  directusers <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  directusers <- cast(directusers, date ~ country, value = "directusers")
+  names(directusers)[names(directusers) == "zy"] <- "all"
+  write.csv(directusers, path, quote = FALSE, row.names = FALSE)
+}
+
+export_bridge_users <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT date, country, users AS bridgeusers",
+      "FROM bridge_stats",
+      "WHERE date < (SELECT MAX(date) FROM bridge_stats)",
+      "ORDER BY date, country")
+  rs <- dbSendQuery(con, q)
+  bridgeusers <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  bridgeusers <- cast(bridgeusers, date ~ country, value = "bridgeusers")
+  names(bridgeusers)[names(bridgeusers) == "zy"] <- "all"
+  write.csv(bridgeusers, path, quote = FALSE, row.names = FALSE)
+}
+
+export_gettor <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- "SELECT date, bundle, downloads FROM gettor_stats"
+  rs <- dbSendQuery(con, q)
+  downloads <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  downloads_total <- downloads[downloads$bundle != "none", ]
+  downloads_total <- aggregate(downloads_total$downloads,
+      by = list(date = downloads_total$date), sum)
+  downloads_en <- downloads[grep("*_en", downloads$bundle), ]
+  downloads_en <- aggregate(downloads_en$downloads,
+      by = list(date = downloads_en$date), sum)
+  downloads_zh_cn <- downloads[grep("*_zh_cn", downloads$bundle), ]
+  downloads_zh_cn <- aggregate(downloads_zh_cn$downloads,
+      by = list(date = downloads_zh_cn$date), sum)
+  downloads_fa <- downloads[grep("*_fa", downloads$bundle), ]
+  downloads_fa <- aggregate(downloads_fa$downloads,
+      by = list(date = downloads_fa$date), sum)
+  downloads <- rbind(
+      data.frame(date = downloads_total$date,
+        bundle = "total", downloads = downloads_total$x),
+      data.frame(date = downloads_en$date,
+        bundle = "en", downloads = downloads_en$x),
+      data.frame(date = downloads_zh_cn$date,
+        bundle = "zh_cn", downloads = downloads_zh_cn$x),
+      data.frame(date = downloads_fa$date,
+        bundle = "fa", downloads = downloads_fa$x))
+  downloads <- cast(downloads, date ~ bundle, value = "downloads")
+  downloads <- downloads[order(downloads$date), ]
+  write.csv(downloads, path, quote = FALSE, row.names = FALSE)
+}
+
+export_torperf <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT source, date, q1, md, q3 FROM torperf_stats",
+      "ORDER BY source, date")
+  rs <- dbSendQuery(con, q)
+  torperf <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  write.csv(torperf, path, quote = FALSE, row.names = FALSE)
+}
+
+help_export_monthly_users <- function(path, aggr_fun) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT date, country,",
+      "FLOOR(10 * requests / share) AS users",
+      "FROM dirreq_stats WHERE share >= 1",
+      "AND source = '8522EB98C91496E80EC238E732594D1509158E77'",
+      "ORDER BY date, country")
+  rs <- dbSendQuery(con, q)
+  trusted <- fetch(rs, n = -1)
+  q <- paste("SELECT date, country, FLOOR(users) AS users",
+      "FROM bridge_stats",
+      "WHERE date < (SELECT MAX(date) FROM bridge_stats)",
+      "ORDER BY date, country")
+  rs <- dbSendQuery(con, q)
+  bridge <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  users <- rbind(bridge, trusted)
+  users <- aggregate(users$users,
+      by = list(date = users$date, country = users$country), sum)
+  users <- aggregate(users$x, by = list(month = substr(users$date, 1, 7),
+      country = users$country), aggr_fun)
+  users <- cast(users, country ~ month, value = "x")
+  users[users$country == "zy", 1] <- "all"
+  users[, 2:length(users)] <- floor(users[, 2:length(users)])
+  write.csv(users, path, quote = FALSE, row.names = FALSE)
+}
+
+export_monthly_users_peak <- function(path) {
+  help_export_monthly_users(path, max)
+}
+
+export_monthly_users_average <- function(path) {
+  help_export_monthly_users(path, mean)
+}
+
diff --git a/src/org/torproject/ernie/web/CsvServlet.java b/src/org/torproject/ernie/web/CsvServlet.java
new file mode 100644
index 0000000..eff5bec
--- /dev/null
+++ b/src/org/torproject/ernie/web/CsvServlet.java
@@ -0,0 +1,102 @@
+package org.torproject.ernie.web;
+
+import java.io.*;
+import java.text.*;
+import java.util.*;
+import java.util.logging.*;
+
+import javax.servlet.*;
+import javax.servlet.http.*;
+
+/**
+ * Servlet that reads an HTTP request for a comma-separated value file,
+ * asks the GraphGenerator to generate this file, and returns it to the
+ * client.
+ */
+public class CsvServlet extends HttpServlet {
+
+  private RObjectGenerator rObjectGenerator;
+
+  /* Available CSV files. */
+  private Set<String> availableCsvFiles;
+
+  private Logger logger;
+
+  public void init() {
+
+    /* Initialize logger. */
+    this.logger = Logger.getLogger(CsvServlet.class.toString());
+
+    /* Initialize map of available CSV files. */
+    this.availableCsvFiles = new HashSet<String>();
+    this.availableCsvFiles.add("bandwidth");
+    this.availableCsvFiles.add("bridge-users");
+    this.availableCsvFiles.add("direct-users");
+    this.availableCsvFiles.add("dirbytes");
+    this.availableCsvFiles.add("gettor");
+    this.availableCsvFiles.add("monthly-users-average");
+    this.availableCsvFiles.add("monthly-users-peak");
+    this.availableCsvFiles.add("networksize");
+    this.availableCsvFiles.add("new-users");
+    this.availableCsvFiles.add("platforms");
+    this.availableCsvFiles.add("relayflags");
+    this.availableCsvFiles.add("relayflags-hour");
+    this.availableCsvFiles.add("torperf");
+    this.availableCsvFiles.add("versions");
+
+    /* Get a reference to the R object generator that we need to generate
+     * CSV files. */
+    this.rObjectGenerator = (RObjectGenerator) getServletContext().
+        getAttribute("RObjectGenerator");
+  }
+
+  public void doGet(HttpServletRequest request,
+      HttpServletResponse response) throws IOException,
+      ServletException {
+
+    /* Find out which CSV file was requested and make sure we know this
+     * CSV file type. */
+    String requestURI = request.getRequestURI();
+    String requestedCsvFile = requestURI;
+    if (requestedCsvFile.endsWith(".csv")) {
+      requestedCsvFile = requestedCsvFile.substring(0,
+          requestedCsvFile.length() - ".csv".length());
+    }
+    if (requestedCsvFile.contains("/")) {
+      requestedCsvFile = requestedCsvFile.substring(requestedCsvFile.
+          lastIndexOf("/") + 1);
+    }
+    if (!availableCsvFiles.contains(requestedCsvFile)) {
+      logger.info("Did not recognize requested .csv file from request "
+          + "URI: '" + requestURI + "'. Responding with 404 Not Found.");
+      response.sendError(HttpServletResponse.SC_NOT_FOUND);
+      return;
+    }
+    logger.fine("CSV file '" + requestedCsvFile + ".csv' requested.");
+
+    /* Prepare filename and R query string. */
+    String rQuery = "export_" + requestedCsvFile.replaceAll("-", "_")
+        + "(path = '%s')";
+    String csvFilename = requestedCsvFile + ".csv";
+
+    /* Request CSV file from R object generator, which asks Rserve to
+     * generate it. */
+    String csvFileContent = this.rObjectGenerator.generateCsv(rQuery,
+        csvFilename);
+
+    /* Make sure that we have a graph to return. */
+    if (csvFileContent == null) {
+      response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+      return;
+    }
+
+    /* Write CSV file to response. */
+    response.setContentType("text/csv");
+    response.setHeader("Content-Length", String.valueOf(
+        csvFileContent.length()));
+    response.setHeader("Content-Disposition",
+        "inline; filename=\"" + csvFilename + "\"");
+    response.getWriter().print(csvFileContent);
+  }
+}
+
diff --git a/src/org/torproject/ernie/web/GraphGenerator.java b/src/org/torproject/ernie/web/GraphGenerator.java
deleted file mode 100644
index 2b59e2e..0000000
--- a/src/org/torproject/ernie/web/GraphGenerator.java
+++ /dev/null
@@ -1,78 +0,0 @@
-package org.torproject.ernie.web;
-
-import java.io.*;
-import java.util.*;
-
-import org.rosuda.REngine.Rserve.*;
-import org.rosuda.REngine.*;
-
-public class GraphGenerator {
-
-  /* Host and port where Rserve is listening. */
-  private String rserveHost;
-  private int rservePort;
-
-  /* Some parameters for our cache of graph images. */
-  private String cachedGraphsDirectory;
-  private long maxCacheAge;
-
-  public GraphGenerator(String rserveHost, String rservePort,
-      String maxCacheAge, String cachedGraphsDir) {
-
-    this.rserveHost = rserveHost;
-    this.rservePort = Integer.parseInt(rservePort);
-    this.cachedGraphsDirectory = cachedGraphsDir;
-    this.maxCacheAge = Long.parseLong(maxCacheAge);
-  }
-
-  /* Generate a graph using the given R query that has a placeholder for
-   * the absolute path to the image to be created. */
-  public byte[] generateGraph(String rQuery, String imageFilename) {
-
-    /* See if we need to generate this graph. */
-    File imageFile = new File(this.cachedGraphsDirectory + "/"
-        + imageFilename);
-    long now = System.currentTimeMillis();
-    if (!imageFile.exists() || imageFile.lastModified() < now
-        - this.maxCacheAge * 1000L) {
-
-      /* We do. Update the R query to contain the absolute path to the file
-       * to be generated, create a connection to Rserve, run the R query,
-       * and close the connection. The generated graph will be on disk. */
-      rQuery = String.format(rQuery, imageFile.getAbsolutePath());
-      try {
-        RConnection rc = new RConnection(rserveHost, rservePort);
-        rc.eval(rQuery);
-        rc.close();
-      } catch (RserveException e) {
-        return null;
-      }
-
-      /* Check that we really just generated the file */
-      if (!imageFile.exists() || imageFile.lastModified() < now
-          - this.maxCacheAge * 1000L) {
-        return null;
-      }
-    }
-
-    /* Read the image from disk and write it to a byte array. */
-    byte[] result = null;
-    try {
-      BufferedInputStream bis = new BufferedInputStream(
-          new FileInputStream(imageFile), 1024);
-      ByteArrayOutputStream baos = new ByteArrayOutputStream();
-      byte[] buffer = new byte[1024];
-      int length;
-      while ((length = bis.read(buffer)) > 0) {
-        baos.write(buffer, 0, length);
-      }
-      result = baos.toByteArray();
-    } catch (IOException e) {
-      return null;
-    }
-
-    /* Return the graph bytes. */
-    return result;
-  }
-}
-
diff --git a/src/org/torproject/ernie/web/GraphImageServlet.java b/src/org/torproject/ernie/web/GraphImageServlet.java
index 819ef71..db9eaaa 100644
--- a/src/org/torproject/ernie/web/GraphImageServlet.java
+++ b/src/org/torproject/ernie/web/GraphImageServlet.java
@@ -8,22 +8,19 @@ import javax.servlet.http.*;
 
 /**
  * Servlet that reads an HTTP request for a graph image, asks the
- * GraphGenerator to generate this graph if it's not in the cache, and
+ * RObjectGenerator to generate this graph if it's not in the cache, and
  * returns the image bytes to the client.
  */
 public class GraphImageServlet extends HttpServlet {
 
-  private GraphGenerator graphGenerator;
+  private RObjectGenerator rObjectGenerator;
 
   public void init() {
-    ServletConfig servletConfig = getServletConfig();
-    String rserveHost = servletConfig.getInitParameter("rserveHost");
-    String rservePort = servletConfig.getInitParameter("rservePort");
-    String maxCacheAge = servletConfig.getInitParameter("maxCacheAge");
-    String cachedGraphsDir = servletConfig.getInitParameter(
-        "cachedGraphsDir");
-    this.graphGenerator = new GraphGenerator(rserveHost, rservePort,
-        maxCacheAge, cachedGraphsDir);
+
+    /* Get a reference to the R object generator that we need to generate
+     * graph images. */
+    this.rObjectGenerator = (RObjectGenerator) getServletContext().
+        getAttribute("RObjectGenerator");
   }
 
   public void doGet(HttpServletRequest request,
@@ -79,9 +76,9 @@ public class GraphImageServlet extends HttpServlet {
     rQueryBuilder.append("path = '%s')");
     String rQuery = rQueryBuilder.toString();
 
-    /* Request graph from graph controller, which either returns it from
+    /* Request graph from R object generator, which either returns it from
      * its cache or asks Rserve to generate it. */
-    byte[] graphBytes = graphGenerator.generateGraph(rQuery,
+    byte[] graphBytes = rObjectGenerator.generateGraph(rQuery,
         imageFilename);
 
     /* Make sure that we have a graph to return. */
diff --git a/src/org/torproject/ernie/web/RObjectGenerator.java b/src/org/torproject/ernie/web/RObjectGenerator.java
new file mode 100644
index 0000000..7f152dd
--- /dev/null
+++ b/src/org/torproject/ernie/web/RObjectGenerator.java
@@ -0,0 +1,139 @@
+package org.torproject.ernie.web;
+
+import java.io.*;
+import java.util.*;
+import java.util.logging.*;
+
+import javax.servlet.*;
+
+import org.rosuda.REngine.Rserve.*;
+import org.rosuda.REngine.*;
+
+public class RObjectGenerator implements ServletContextListener {
+
+  /* Host and port where Rserve is listening. */
+  private String rserveHost;
+  private int rservePort;
+
+  /* Some parameters for our cache of graph images. */
+  private String cachedGraphsDirectory;
+  private long maxCacheAge;
+
+  private Logger logger;
+
+  public void contextInitialized(ServletContextEvent event) {
+
+    /* Initialize logger. */
+    this.logger = Logger.getLogger(RObjectGenerator.class.toString());
+
+    /* Initialize using context parameters. */
+    ServletContext servletContext = event.getServletContext();
+    this.rserveHost = servletContext.getInitParameter("rserveHost");
+    this.rservePort = Integer.parseInt(servletContext.getInitParameter(
+        "rservePort"));
+    this.maxCacheAge = Long.parseLong(servletContext.getInitParameter(
+        "maxCacheAge"));
+    this.cachedGraphsDirectory = servletContext.getInitParameter(
+        "cachedGraphsDir");
+
+    /* Register ourself, so that servlets can use us. */
+    servletContext.setAttribute("RObjectGenerator", this);
+  }
+
+  public void contextDestroyed(ServletContextEvent event) {
+    /* Nothing to do. */
+  }
+
+  /* Generate a graph using the given R query that has a placeholder for
+   * the absolute path to the image to be created. */
+  public byte[] generateGraph(String rQuery, String imageFilename) {
+
+    /* See if we need to generate this graph. */
+    File imageFile = new File(this.cachedGraphsDirectory + "/"
+        + imageFilename);
+    long now = System.currentTimeMillis();
+    if (!imageFile.exists() || imageFile.lastModified() < now
+        - this.maxCacheAge * 1000L) {
+
+      /* We do. Update the R query to contain the absolute path to the file
+       * to be generated, create a connection to Rserve, run the R query,
+       * and close the connection. The generated graph will be on disk. */
+      rQuery = String.format(rQuery, imageFile.getAbsolutePath());
+      try {
+        RConnection rc = new RConnection(rserveHost, rservePort);
+        rc.eval(rQuery);
+        rc.close();
+      } catch (RserveException e) {
+        return null;
+      }
+
+      /* Check that we really just generated the file */
+      if (!imageFile.exists() || imageFile.lastModified() < now
+          - this.maxCacheAge * 1000L) {
+        return null;
+      }
+    }
+
+    /* Read the image from disk and write it to a byte array. */
+    byte[] result = null;
+    try {
+      BufferedInputStream bis = new BufferedInputStream(
+          new FileInputStream(imageFile), 1024);
+      ByteArrayOutputStream baos = new ByteArrayOutputStream();
+      byte[] buffer = new byte[1024];
+      int length;
+      while ((length = bis.read(buffer)) > 0) {
+        baos.write(buffer, 0, length);
+      }
+      result = baos.toByteArray();
+    } catch (IOException e) {
+      return null;
+    }
+
+    /* Return the graph bytes. */
+    return result;
+  }
+
+  /* Generate a comma-separated value file using the given R query that
+   * has a placeholder for the absolute path to the file to be created. */
+  public String generateCsv(String rQuery, String csvFilename) {
+
+    /* Update the R query to contain the absolute path to the file to be
+     * generated, create a connection to Rserve, run the R query, and
+     * close the connection. The generated csv file will be on disk in the
+     * same directory as the generated graphs. */
+    File csvFile = new File(this.cachedGraphsDirectory + "/"
+        + csvFilename);
+    rQuery = String.format(rQuery, csvFile.getAbsolutePath());
+    try {
+      RConnection rc = new RConnection(rserveHost, rservePort);
+      rc.eval(rQuery);
+      rc.close();
+    } catch (RserveException e) {
+      return null;
+    }
+
+    /* Check that we really just generated the file */
+    if (!csvFile.exists()) {
+      return null;
+    }
+
+    /* Read the text file from disk and write it to a string. */
+    String result = null;
+    try {
+      StringBuilder sb = new StringBuilder();
+      BufferedReader br = new BufferedReader(new FileReader(csvFile));
+      String line = null;
+      while ((line = br.readLine()) != null) {
+        sb.append(line + "\n");
+      }
+      result = sb.toString();
+    } catch (IOException e) {
+      return null;
+    }
+
+    /* Return the csv file. */
+    return result;
+  }
+}
+
diff --git a/web/WEB-INF/network.jsp b/web/WEB-INF/network.jsp
index 9e2a08b..b154665 100644
--- a/web/WEB-INF/network.jsp
+++ b/web/WEB-INF/network.jsp
@@ -68,6 +68,7 @@ average number of relays with these flags assigned.</p>
     </p>
   </div>
 </form>
+<p><a href="csv/relayflags.csv">CSV</a> file containing all data.</p>
 <br>
 
 <h3>Relays with Exit, Fast, Guard, and Stable flags on 1-hour detail</h3>
@@ -99,6 +100,7 @@ available on 1-hour detail.</p>
     </p>
   </div>
 </form>
+<p><a href="csv/relayflags-hour.csv">CSV</a> file containing all data.</p>
 <br>
 
 <h3>Relays by version</h3>
@@ -124,6 +126,7 @@ version.</p>
     </p>
   </div>
 </form>
+<p><a href="csv/versions.csv">CSV</a> file containing all data.</p>
 <br>
 
 <h3>Relays by platform</h3>
@@ -149,6 +152,7 @@ platform.</p>
     </p>
   </div>
 </form>
+<p><a href="csv/platforms.csv">CSV</a> file containing all data.</p>
 <br>
 
 <h3>Total relay bandwidth in the network</h3>
@@ -175,6 +179,7 @@ in the network.</p>
     </p>
   </div>
 </form>
+<p><a href="csv/bandwidth.csv">CSV</a> file containing all data.</p>
 <br>
 
 <h3>Number of bytes spent on answering directory requests</h3>
@@ -202,6 +207,7 @@ the number of written and read dir bytes by all relays.</p>
     </p>
   </div>
 </form>
+<p><a href="csv/dirbytes.csv">CSV</a> file containing all data.</p>
 <br>
     </div>
   </div>
-- 
1.7.1



More information about the tor-commits mailing list