[tor-commits] [metrics-web/master] Remove GetTor statistics processing code.

karsten at torproject.org karsten at torproject.org
Tue Aug 7 11:15:10 UTC 2012


commit 27869c49223db85aa05baad14ffc619708d9e661
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Aug 7 12:33:09 2012 +0200

    Remove GetTor statistics processing code.
---
 README                                             |   25 +---
 config.template                                    |    6 -
 db/tordir.sql                                      |    9 --
 rserve/csv.R                                       |   35 -----
 rserve/graphs.R                                    |   39 -----
 src/org/torproject/ernie/cron/Configuration.java   |   13 --
 src/org/torproject/ernie/cron/GetTorProcessor.java |  152 --------------------
 src/org/torproject/ernie/cron/Main.java            |    7 -
 src/org/torproject/ernie/web/GraphDataServlet.java |    1 -
 src/org/torproject/ernie/web/RObjectGenerator.java |    2 -
 10 files changed, 1 insertions(+), 288 deletions(-)

diff --git a/README b/README
index 8b3651f..1c551e0 100644
--- a/README
+++ b/README
@@ -20,8 +20,7 @@ or only the database and the graphing engine, if desired.
 
 The metrics database contains data about the Tor Network coming from
 different sources, including the Tor directory authorities, Torperf
-performance measurement installations, the GetTor software package
-delivery service, and others.
+performance measurement installations, and others.
 
 
 1.1. Preparing the operating system
@@ -289,28 +288,6 @@ Run the database import:
 $ ./run.sh
 
 
-1.10. Importing GetTor statistics
-=================================
-
-WARNING: The GetTor statistics are not available for download yet, so that
-this section only applies to the official metrics website.
-
-GetTor is a software distribution service that allows users to fetch the
-Tor software via email.  GetTor produces daily statistics of requested
-packages that can be imported into the metrics database.
-
-Put the GetTor statistics file into /srv/metrics-web/gettor/ .
-
-Edit /srv/metrics-web/config to contain the following options:
-
-ProcessGetTorStats 1
-GetTorDirectory gettor/
-
-Run the database import:
-
-$ ./run.sh
-
-
 2. Installing the graphing engine
 =================================
 
diff --git a/config.template b/config.template
index 528b36f..0491431 100644
--- a/config.template
+++ b/config.template
@@ -45,12 +45,6 @@
 ## Relative path to directory to import torperf results from
 #TorperfDirectory in/torperf/
 #
-## Process GetTor stats and import them into the database
-#ProcessGetTorStats 0
-#
-## Relative path to directory where to find GetTor stats
-#GetTorDirectory in/gettor/
-#
 ## JDBC string for ExoneraTor database
 #ExoneraTorDatabaseJdbc jdbc:postgresql://localhost/exonerator?user=metrics&password=password
 #
diff --git a/db/tordir.sql b/db/tordir.sql
index 94c9320..93820bc 100644
--- a/db/tordir.sql
+++ b/db/tordir.sql
@@ -899,15 +899,6 @@ CREATE TABLE torperf_stats (
     CONSTRAINT torperf_stats_pkey PRIMARY KEY("date", source)
 );
 
--- TABLE gettor_stats
--- Packages requested from GetTor
-CREATE TABLE gettor_stats (
-    "date" DATE NOT NULL,
-    bundle CHARACTER VARYING(64) NOT NULL,
-    downloads INTEGER NOT NULL,
-    CONSTRAINT gettor_stats_pkey PRIMARY KEY("date", bundle)
-);
-
 -- Refresh all statistics in the database.
 CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
   BEGIN
diff --git a/rserve/csv.R b/rserve/csv.R
index 280d7b6..eec76d9 100644
--- a/rserve/csv.R
+++ b/rserve/csv.R
@@ -182,41 +182,6 @@ export_bridge_users <- function(path) {
   write.csv(bridgeusers, path, quote = FALSE, row.names = FALSE)
 }
 
-export_gettor <- function(path) {
-  drv <- dbDriver("PostgreSQL")
-  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
-  q <- paste("SELECT date, bundle, downloads FROM gettor_stats",
-      "WHERE date < current_date - 1")
-  rs <- dbSendQuery(con, q)
-  downloads <- fetch(rs, n = -1)
-  dbDisconnect(con)
-  dbUnloadDriver(drv)
-  downloads_total <- downloads[downloads$bundle != "none", ]
-  downloads_total <- aggregate(downloads_total$downloads,
-      by = list(date = downloads_total$date), sum)
-  downloads_en <- downloads[grep("*_en", downloads$bundle), ]
-  downloads_en <- aggregate(downloads_en$downloads,
-      by = list(date = downloads_en$date), sum)
-  downloads_zh_cn <- downloads[grep("*_zh_cn", downloads$bundle), ]
-  downloads_zh_cn <- aggregate(downloads_zh_cn$downloads,
-      by = list(date = downloads_zh_cn$date), sum)
-  downloads_fa <- downloads[grep("*_fa", downloads$bundle), ]
-  downloads_fa <- aggregate(downloads_fa$downloads,
-      by = list(date = downloads_fa$date), sum)
-  downloads <- rbind(
-      data.frame(date = downloads_total$date,
-        bundle = "total", downloads = downloads_total$x),
-      data.frame(date = downloads_en$date,
-        bundle = "en", downloads = downloads_en$x),
-      data.frame(date = downloads_zh_cn$date,
-        bundle = "zh_cn", downloads = downloads_zh_cn$x),
-      data.frame(date = downloads_fa$date,
-        bundle = "fa", downloads = downloads_fa$x))
-  downloads <- cast(downloads, date ~ bundle, value = "downloads")
-  downloads <- downloads[order(downloads$date), ]
-  write.csv(downloads, path, quote = FALSE, row.names = FALSE)
-}
-
 export_torperf <- function(path) {
   drv <- dbDriver("PostgreSQL")
   con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
diff --git a/rserve/graphs.R b/rserve/graphs.R
index a441d86..87917aa 100644
--- a/rserve/graphs.R
+++ b/rserve/graphs.R
@@ -781,45 +781,6 @@ plot_bridge_users <- function(start, end, country, path, dpi) {
   ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi))
 }
 
-plot_gettor <- function(start, end, language, path, dpi) {
-  drv <- dbDriver("PostgreSQL")
-  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
-  condition <- ifelse(language == "all", "<> 'none'",
-      paste("LIKE '%_", tolower(language), "'", sep = ""))
-  q <- paste("SELECT date, SUM(downloads) AS downloads ",
-      "FROM gettor_stats WHERE bundle ", condition, " AND date >= '",
-      start, "' AND date <= '", end,
-      "' AND date < current_date - 1 GROUP BY date", sep = "")
-  rs <- dbSendQuery(con, q)
-  downloads <- fetch(rs, n = -1)
-  dbDisconnect(con)
-  dbUnloadDriver(drv)
-  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
-      to = as.Date(end, "%Y-%m-%d"), by="1 day")
-  missing <- setdiff(dates, downloads$date)
-  if (length(missing) > 0)
-    downloads <- rbind(downloads,
-        data.frame(date = as.Date(missing, origin = "1970-01-01"),
-        downloads = NA))
-  title <- ifelse(language == "all",
-    "Total packages requested from GetTor per day\n",
-    paste(languagename(language), " (", language,
-        ") packages requested from GetTor per day\n", sep = ""))
-  date_breaks <- date_breaks(
-    as.numeric(max(as.Date(downloads$date, "%Y-%m-%d")) -
-    min(as.Date(downloads$date, "%Y-%m-%d"))))
-  ggplot(downloads, aes(x = as.Date(date, "%Y-%m-%d"), y = downloads)) +
-    geom_line(size = 1) +
-    scale_x_date(name = paste("\nThe Tor Project - ",
-        "https://metrics.torproject.org/", sep = ""),
-        format = date_breaks$format, major = date_breaks$major,
-        minor = date_breaks$minor) +
-    scale_y_continuous(name = "", limits = c(0, max(downloads$downloads,
-        na.rm = TRUE))) +
-    opts(title = title)
-  ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi))
-}
-
 plot_torperf <- function(start, end, source, filesize, path, dpi) {
   drv <- dbDriver("PostgreSQL")
   con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
diff --git a/src/org/torproject/ernie/cron/Configuration.java b/src/org/torproject/ernie/cron/Configuration.java
index 5cdadaf..878e882 100644
--- a/src/org/torproject/ernie/cron/Configuration.java
+++ b/src/org/torproject/ernie/cron/Configuration.java
@@ -30,8 +30,6 @@ public class Configuration {
   private boolean writeBridgeStats = false;
   private boolean importWriteTorperfStats = false;
   private String torperfDirectory = "in/torperf/";
-  private boolean processGetTorStats = false;
-  private String getTorDirectory = "in/gettor/";
   private String exoneraTorDatabaseJdbc = "jdbc:postgresql:"
       + "//localhost/exonerator?user=metrics&password=password";
   private String exoneraTorImportDirectory = "exonerator-import/";
@@ -87,11 +85,6 @@ public class Configuration {
               line.split(" ")[1]) != 0;
         } else if (line.startsWith("TorperfDirectory")) {
           this.torperfDirectory = line.split(" ")[1];
-        } else if (line.startsWith("ProcessGetTorStats")) {
-          this.processGetTorStats = Integer.parseInt(
-              line.split(" ")[1]) != 0;
-        } else if (line.startsWith("GetTorDirectory")) {
-          this.getTorDirectory = line.split(" ")[1];
         } else if (line.startsWith("ExoneraTorDatabaseJdbc")) {
           this.exoneraTorDatabaseJdbc = line.split(" ")[1];
         } else if (line.startsWith("ExoneraTorImportDirectory")) {
@@ -160,12 +153,6 @@ public class Configuration {
   public String getTorperfDirectory() {
     return this.torperfDirectory;
   }
-  public boolean getProcessGetTorStats() {
-    return this.processGetTorStats;
-  }
-  public String getGetTorDirectory() {
-    return this.getTorDirectory;
-  }
   public String getExoneraTorDatabaseJdbc() {
     return this.exoneraTorDatabaseJdbc;
   }
diff --git a/src/org/torproject/ernie/cron/GetTorProcessor.java b/src/org/torproject/ernie/cron/GetTorProcessor.java
deleted file mode 100644
index d561784..0000000
--- a/src/org/torproject/ernie/cron/GetTorProcessor.java
+++ /dev/null
@@ -1,152 +0,0 @@
-/* Copyright 2011, 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.cron;
-
-import java.io.File;
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.text.SimpleDateFormat;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorFile;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.GetTorStatistics;
-
-public class GetTorProcessor {
-  public GetTorProcessor(File getTorDirectory, String connectionURL) {
-
-    Logger logger = Logger.getLogger(GetTorProcessor.class.getName());
-
-    /* Parse stats file. */
-    File getTorFile = new File(getTorDirectory, "gettor_stats.txt");
-    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-    if (!getTorFile.exists() || getTorFile.isDirectory()) {
-      logger.warning("Could not read GetTor stats");
-      return;
-    }
-    SortedSet<String> columns = new TreeSet<String>();
-    SortedMap<String, Map<String, Integer>> data =
-        new TreeMap<String, Map<String, Integer>>();
-
-    logger.fine("Importing GetTor stats files in directory "
-        + getTorDirectory + "/...");
-    DescriptorReader reader =
-        DescriptorSourceFactory.createDescriptorReader();
-    reader.addDirectory(getTorDirectory);
-    Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
-    while (descriptorFiles.hasNext()) {
-      DescriptorFile descriptorFile = descriptorFiles.next();
-      if (descriptorFile.getException() != null) {
-        logger.log(Level.WARNING, "Could not parse descriptor file '"
-            + descriptorFile.getFileName() + "'.  Skipping.",
-            descriptorFile.getException());
-        continue;
-      }
-      if (descriptorFile.getDescriptors() != null) {
-        for (Descriptor descriptor : descriptorFile.getDescriptors()) {
-          if (!(descriptor instanceof GetTorStatistics)) {
-            continue;
-          }
-          GetTorStatistics stats = (GetTorStatistics) descriptor;
-          String date = dateFormat.format(stats.getDateMillis());
-          Map<String, Integer> obs = new HashMap<String, Integer>();
-          for (Map.Entry<String, Integer> e :
-              stats.getDownloadedPackages().entrySet()) {
-            columns.add(e.getKey().toLowerCase());
-            obs.put(e.getKey().toLowerCase(), e.getValue());
-          }
-          data.put(date, obs);
-        }
-      }
-    }
-
-    /* Write results to database. */
-    if (connectionURL != null) {
-      try {
-        Map<String, Integer> updateRows = new HashMap<String, Integer>(),
-            insertRows = new HashMap<String, Integer>();
-        for (Map.Entry<String, Map<String, Integer>> e :
-            data.entrySet()) {
-          String date = e.getKey();
-          Map<String, Integer> obs = e.getValue();
-          for (String column : columns) {
-            if (obs.containsKey(column)) {
-              Integer value = obs.get(column);
-              String key = date + "," + column;
-              insertRows.put(key, value);
-            }
-          }
-        }
-        Connection conn = DriverManager.getConnection(connectionURL);
-        PreparedStatement psI = conn.prepareStatement(
-            "INSERT INTO gettor_stats (downloads, date, bundle) "
-            + "VALUES (?, ?, ?)");
-        PreparedStatement psU = conn.prepareStatement(
-            "UPDATE gettor_stats SET downloads = ? "
-            + "WHERE date = ? AND bundle = ?");
-        conn.setAutoCommit(false);
-        Statement statement = conn.createStatement();
-        ResultSet rs = statement.executeQuery(
-            "SELECT date, bundle, downloads FROM gettor_stats");
-        while (rs.next()) {
-          String date = rs.getDate(1).toString();
-          String bundle = rs.getString(2);
-          String key = date + "," + bundle;
-          if (insertRows.containsKey(key)) {
-            int insertRow = insertRows.remove(key);
-            int oldCount = rs.getInt(3);
-            if (insertRow != oldCount) {
-              updateRows.put(key, insertRow);
-            }
-          }
-        }
-        for (Map.Entry<String, Integer> e : updateRows.entrySet()) {
-          String[] keyParts = e.getKey().split(",");
-          java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
-          String bundle = keyParts[1];
-          int downloads = e.getValue();
-          psU.clearParameters();
-          psU.setLong(1, downloads);
-          psU.setDate(2, date);
-          psU.setString(3, bundle);
-          psU.executeUpdate();
-        }
-        for (Map.Entry<String, Integer> e : insertRows.entrySet()) {
-          String[] keyParts = e.getKey().split(",");
-          java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
-          String bundle = keyParts[1];
-          int downloads = e.getValue();
-          psI.clearParameters();
-          psI.setLong(1, downloads);
-          psI.setDate(2, date);
-          psI.setString(3, bundle);
-          psI.executeUpdate();
-        }
-        conn.commit();
-        conn.close();
-      } catch (SQLException e) {
-        logger.log(Level.WARNING, "Failed to add GetTor stats to "
-            + "database.", e);
-      }
-    }
-
-    logger.info("Finished processing statistics on Tor packages "
-        + "delivered by GetTor.\nLast date in statistics is "
-        + (data.isEmpty() ? "(null)" : data.lastKey()) + ".");
-  }
-}
-
diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java
index 8022fcc..6c98602 100644
--- a/src/org/torproject/ernie/cron/Main.java
+++ b/src/org/torproject/ernie/cron/Main.java
@@ -84,13 +84,6 @@ public class Main {
           statsDirectory, config.getRelayDescriptorDatabaseJDBC());
     }
 
-    // Download and process GetTor stats
-    if (config.getProcessGetTorStats()) {
-      new GetTorProcessor(
-          new File(config.getGetTorDirectory()),
-          config.getRelayDescriptorDatabaseJDBC());
-    }
-
     // Remove lock file
     lf.releaseLock();
 
diff --git a/src/org/torproject/ernie/web/GraphDataServlet.java b/src/org/torproject/ernie/web/GraphDataServlet.java
index 5b79c1b..11800d7 100644
--- a/src/org/torproject/ernie/web/GraphDataServlet.java
+++ b/src/org/torproject/ernie/web/GraphDataServlet.java
@@ -68,7 +68,6 @@ public class GraphDataServlet extends HttpServlet {
         "direct-users");
     this.availableGraphDataFiles.put("bridge-users-by-country",
         "bridge-users");
-    this.availableGraphDataFiles.put("gettor", "gettor");
     this.availableGraphDataFiles.put("torperf", "torperf");
 
     /* Initialize map of graphs with specific variable columns. */
diff --git a/src/org/torproject/ernie/web/RObjectGenerator.java b/src/org/torproject/ernie/web/RObjectGenerator.java
index 3a856c6..d8517c1 100644
--- a/src/org/torproject/ernie/web/RObjectGenerator.java
+++ b/src/org/torproject/ernie/web/RObjectGenerator.java
@@ -61,7 +61,6 @@ public class RObjectGenerator implements ServletContextListener {
     this.availableCsvFiles.add("direct-users");
     this.availableCsvFiles.add("dirreq-stats");
     this.availableCsvFiles.add("dirbytes");
-    this.availableCsvFiles.add("gettor");
     this.availableCsvFiles.add("monthly-users-average");
     this.availableCsvFiles.add("monthly-users-peak");
     this.availableCsvFiles.add("networksize");
@@ -95,7 +94,6 @@ public class RObjectGenerator implements ServletContextListener {
         "start,end,country,events,filename,nocutoff,dpi");
     this.availableGraphs.put("bridge-users",
          "start,end,country,filename,dpi");
-    this.availableGraphs.put("gettor", "start,end,language,filename,dpi");
     this.availableGraphs.put("torperf",
          "start,end,source,filesize,filename,dpi");
     this.availableGraphs.put("torperf-failures",





More information about the tor-commits mailing list