[tor-commits] [metrics-web/master] Stop generating servers.csv in legacy module.

karsten at torproject.org karsten at torproject.org
Thu Nov 22 08:03:10 UTC 2018


commit 6765b8ec054bfac54bff188087031e4cf6a46d64
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Nov 12 10:28:06 2018 +0100

    Stop generating servers.csv in legacy module.
    
    Also stop importing bridge network size statistics into the database.
    
    Required changes to existing legacy.config (removals):
    ImportSanitizedBridges
    SanitizedBridgesDirectory
    KeepSanitizedBridgesImportHistory
    WriteBridgeStats
    
    Required schema changes to live tordir databases:
    
    DROP VIEW stats_servers;
    CREATE OR REPLACE FUNCTION refresh_all() [...]
    DROP TABLE bridge_network_size;
    DROP FUNCTION refresh_relay_versions();
    DROP FUNCTION refresh_relay_platforms();
    DROP FUNCTION refresh_network_size();
    DROP TABLE relay_versions;
    DROP TABLE relay_platforms;
    DROP TABLE relay_countries;
    DROP TABLE network_size;
    
    Part of #28116.
---
 .../metrics/stats/servers/Configuration.java       |  35 --
 .../stats/servers/ConsensusStatsFileHandler.java   | 398 ---------------------
 .../org/torproject/metrics/stats/servers/Main.java |  17 -
 src/main/resources/legacy.config.template          |  15 -
 src/main/sql/legacy/tordir.sql                     | 284 ---------------
 5 files changed, 749 deletions(-)

diff --git a/src/main/java/org/torproject/metrics/stats/servers/Configuration.java b/src/main/java/org/torproject/metrics/stats/servers/Configuration.java
index 8435b90..c4597bc 100644
--- a/src/main/java/org/torproject/metrics/stats/servers/Configuration.java
+++ b/src/main/java/org/torproject/metrics/stats/servers/Configuration.java
@@ -30,12 +30,6 @@ public class Configuration {
 
   private boolean keepDirectoryArchiveImportHistory = false;
 
-  private boolean importSanitizedBridges = false;
-
-  private String sanitizedBridgesDirectory = "in/bridge-descriptors/";
-
-  private boolean keepSanitizedBridgesImportHistory = false;
-
   private boolean writeRelayDescriptorDatabase = false;
 
   private String relayDescriptorDatabaseJdbc =
@@ -45,8 +39,6 @@ public class Configuration {
 
   private String relayDescriptorRawFilesDirectory = "pg-import/";
 
-  private boolean writeBridgeStats = false;
-
   /** Initializes this configuration class. */
   public Configuration() {
 
@@ -67,14 +59,6 @@ public class Configuration {
         } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
           this.keepDirectoryArchiveImportHistory = Integer.parseInt(
               line.split(" ")[1]) != 0;
-        } else if (line.startsWith("ImportSanitizedBridges")) {
-          this.importSanitizedBridges = Integer.parseInt(
-              line.split(" ")[1]) != 0;
-        } else if (line.startsWith("SanitizedBridgesDirectory")) {
-          this.sanitizedBridgesDirectory = line.split(" ")[1];
-        } else if (line.startsWith("KeepSanitizedBridgesImportHistory")) {
-          this.keepSanitizedBridgesImportHistory = Integer.parseInt(
-              line.split(" ")[1]) != 0;
         } else if (line.startsWith("WriteRelayDescriptorDatabase")) {
           this.writeRelayDescriptorDatabase = Integer.parseInt(
               line.split(" ")[1]) != 0;
@@ -85,9 +69,6 @@ public class Configuration {
               line.split(" ")[1]) != 0;
         } else if (line.startsWith("RelayDescriptorRawFilesDirectory")) {
           this.relayDescriptorRawFilesDirectory = line.split(" ")[1];
-        } else if (line.startsWith("WriteBridgeStats")) {
-          this.writeBridgeStats = Integer.parseInt(
-              line.split(" ")[1]) != 0;
         } else if (!line.startsWith("#") && line.length() > 0) {
           log.error("Configuration file contains unrecognized "
               + "configuration key in line '{}'! Exiting!", line);
@@ -136,18 +117,6 @@ public class Configuration {
     return this.writeRelayDescriptorDatabase;
   }
 
-  public boolean getImportSanitizedBridges() {
-    return this.importSanitizedBridges;
-  }
-
-  public String getSanitizedBridgesDirectory() {
-    return this.sanitizedBridgesDirectory;
-  }
-
-  public boolean getKeepSanitizedBridgesImportHistory() {
-    return this.keepSanitizedBridgesImportHistory;
-  }
-
   public String getRelayDescriptorDatabaseJdbc() {
     return this.relayDescriptorDatabaseJdbc;
   }
@@ -159,9 +128,5 @@ public class Configuration {
   public String getRelayDescriptorRawFilesDirectory() {
     return this.relayDescriptorRawFilesDirectory;
   }
-
-  public boolean getWriteBridgeStats() {
-    return this.writeBridgeStats;
-  }
 }
 
diff --git a/src/main/java/org/torproject/metrics/stats/servers/ConsensusStatsFileHandler.java b/src/main/java/org/torproject/metrics/stats/servers/ConsensusStatsFileHandler.java
deleted file mode 100644
index 960069c..0000000
--- a/src/main/java/org/torproject/metrics/stats/servers/ConsensusStatsFileHandler.java
+++ /dev/null
@@ -1,398 +0,0 @@
-/* Copyright 2011--2018 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.stats.servers;
-
-import org.torproject.descriptor.BridgeNetworkStatus;
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.NetworkStatusEntry;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TimeZone;
-import java.util.TreeMap;
-
-/**
- * Generates statistics on the average number of relays and bridges per
- * day. Accepts parse results from {@code RelayDescriptorParser} and
- * {@code BridgeDescriptorParser} and stores them in intermediate
- * result files {@code stats/consensus-stats-raw} and
- * {@code stats/bridge-consensus-stats-raw}. Writes final results to
- * {@code stats/consensus-stats} for all days for which at least half
- * of the expected consensuses or statuses are known.
- */
-public class ConsensusStatsFileHandler {
-
-  /**
-   * Intermediate results file holding the number of running bridges per
-   * bridge status.
-   */
-  private File bridgeConsensusStatsRawFile;
-
-  /**
-   * Number of running bridges in a given bridge status. Map keys are the bridge
-   * status time formatted as "yyyy-MM-dd HH:mm:ss", a comma, and the bridge
-   * authority nickname, map values are lines as read from
-   * {@code stats/bridge-consensus-stats-raw}.
-   */
-  private SortedMap<String, String> bridgesRaw;
-
-  /**
-   * Average number of running bridges per day. Map keys are dates
-   * formatted as "yyyy-MM-dd", map values are the remaining columns as written
-   * to {@code stats/consensus-stats}.
-   */
-  private SortedMap<String, String> bridgesPerDay;
-
-  private static Logger log = LoggerFactory.getLogger(
-      ConsensusStatsFileHandler.class);
-
-  private int bridgeResultsAdded = 0;
-
-  /* Database connection string. */
-  private String connectionUrl;
-
-  private SimpleDateFormat dateTimeFormat;
-
-  private File bridgesDir;
-
-  private File statsDirectory;
-
-  private boolean keepImportHistory;
-
-  /**
-   * Initializes this class, including reading in intermediate results
-   * files {@code stats/consensus-stats-raw} and
-   * {@code stats/bridge-consensus-stats-raw} and final results file
-   * {@code stats/consensus-stats}.
-   */
-  public ConsensusStatsFileHandler(String connectionUrl,
-      File bridgesDir, File statsDirectory,
-      boolean keepImportHistory) {
-
-    if (bridgesDir == null || statsDirectory == null) {
-      throw new IllegalArgumentException();
-    }
-    this.bridgesDir = bridgesDir;
-    this.statsDirectory = statsDirectory;
-    this.keepImportHistory = keepImportHistory;
-
-    /* Initialize local data structures to hold intermediate and final
-     * results. */
-    this.bridgesPerDay = new TreeMap<>();
-    this.bridgesRaw = new TreeMap<>();
-
-    /* Initialize file names for intermediate and final results files. */
-    this.bridgeConsensusStatsRawFile = new File(
-        "stats/bridge-consensus-stats-raw");
-
-    /* Initialize database connection string. */
-    this.connectionUrl = connectionUrl;
-
-    this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-    this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-
-    /* Read in number of running bridges per bridge status. */
-    if (this.bridgeConsensusStatsRawFile.exists()) {
-      log.debug("Reading file {}...",
-          this.bridgeConsensusStatsRawFile.getAbsolutePath());
-      try (BufferedReader br = new BufferedReader(new FileReader(
-          this.bridgeConsensusStatsRawFile))) {
-        String line;
-        while ((line = br.readLine()) != null) {
-          if (line.startsWith("date")) {
-            /* Skip headers. */
-            continue;
-          }
-          String[] parts = line.split(",");
-          if (parts.length < 2 || parts.length > 4) {
-            log.warn("Corrupt line '{}' in file {}! Aborting to read this "
-                + "file!", line,
-                this.bridgeConsensusStatsRawFile.getAbsolutePath());
-            break;
-          }
-          /* Assume that all lines without authority nickname are based on
-           * Tonga's network status, not Bifroest's. */
-          String key = parts[0] + "," + (parts.length < 4 ? "Tonga" : parts[1]);
-          String value = null;
-          if (parts.length == 2) {
-            value = key + "," + parts[1] + ",0";
-          } else if (parts.length == 3) {
-            value = key + "," + parts[1] + "," + parts[2];
-          } else if (parts.length == 4) {
-            value = key + "," + parts[2] + "," + parts[3];
-          } /* No more cases as we already checked the range above. */
-          this.bridgesRaw.put(key, value);
-        }
-        log.debug("Finished reading file {}.",
-            this.bridgeConsensusStatsRawFile.getAbsolutePath());
-      } catch (IOException e) {
-        log.warn("Failed to read file {}!",
-            this.bridgeConsensusStatsRawFile.getAbsolutePath(), e);
-      }
-    }
-  }
-
-  /**
-   * Adds the intermediate results of the number of running bridges in a
-   * given bridge status to the existing observations.
-   */
-  public void addBridgeConsensusResults(long publishedMillis,
-      String authorityNickname, int running, int runningEc2Bridges) {
-    String publishedAuthority = dateTimeFormat.format(publishedMillis) + ","
-        + authorityNickname;
-    String line = publishedAuthority + "," + running + "," + runningEc2Bridges;
-    if (!this.bridgesRaw.containsKey(publishedAuthority)) {
-      log.debug("Adding new bridge numbers: {}", line);
-      this.bridgesRaw.put(publishedAuthority, line);
-      this.bridgeResultsAdded++;
-    } else if (!line.equals(this.bridgesRaw.get(publishedAuthority))) {
-      log.warn("The numbers of running bridges we were just given ({}) are "
-          + "different from what we learned before ({})! Overwriting!", line,
-          this.bridgesRaw.get(publishedAuthority));
-      this.bridgesRaw.put(publishedAuthority, line);
-    }
-  }
-
-  /** Imports sanitized bridge descriptors. */
-  public void importSanitizedBridges() {
-    if (bridgesDir.exists()) {
-      log.debug("Importing files in directory {}/...", bridgesDir);
-      DescriptorReader reader =
-          DescriptorSourceFactory.createDescriptorReader();
-      File historyFile = new File(statsDirectory,
-          "consensus-stats-bridge-descriptor-history");
-      if (keepImportHistory) {
-        reader.setHistoryFile(historyFile);
-      }
-      for (Descriptor descriptor : reader.readDescriptors(bridgesDir)) {
-        if (descriptor instanceof BridgeNetworkStatus) {
-          String descriptorFileName = descriptor.getDescriptorFile().getName();
-          String authority = null;
-          if (descriptorFileName.contains(
-              "4A0CCD2DDC7995083D73F5D667100C8A5831F16D")) {
-            authority = "Tonga";
-          } else if (descriptorFileName.contains(
-              "1D8F3A91C37C5D1C4C19B1AD1D0CFBE8BF72D8E1")) {
-            authority = "Bifroest";
-          } else if (descriptorFileName.contains(
-              "BA44A889E64B93FAA2B114E02C2A279A8555C533")) {
-            authority = "Serge";
-          }
-          if (authority == null) {
-            log.warn("Did not recognize the bridge authority that generated "
-                + "{}. Skipping.", descriptorFileName);
-            continue;
-          }
-          this.addBridgeNetworkStatus(
-              (BridgeNetworkStatus) descriptor, authority);
-        }
-      }
-      if (keepImportHistory) {
-        reader.saveHistoryFile(historyFile);
-      }
-      log.info("Finished importing bridge descriptors.");
-    }
-  }
-
-  private void addBridgeNetworkStatus(BridgeNetworkStatus status,
-      String authority) {
-    int runningBridges = 0;
-    int runningEc2Bridges = 0;
-    for (NetworkStatusEntry statusEntry
-        : status.getStatusEntries().values()) {
-      if (statusEntry.getFlags().contains("Running")) {
-        runningBridges++;
-        if (statusEntry.getNickname().startsWith("ec2bridge")) {
-          runningEc2Bridges++;
-        }
-      }
-    }
-    this.addBridgeConsensusResults(status.getPublishedMillis(), authority,
-        runningBridges, runningEc2Bridges);
-  }
-
-  /**
-   * Aggregates the raw observations on relay and bridge numbers and
-   * writes both raw and aggregate observations to disk.
-   */
-  public void writeFiles() {
-
-    /* Go through raw observations and put everything into nested maps by day
-     * and bridge authority. */
-    Map<String, Map<String, int[]>> bridgesPerDayAndAuthority = new HashMap<>();
-    for (String bridgesRawLine : this.bridgesRaw.values()) {
-      String[] parts = bridgesRawLine.split(",");
-      int brunning = Integer.parseInt(parts[2]);
-      if (brunning <= 0) {
-        /* Skip this status which contains zero bridges with the Running
-         * flag. */
-        continue;
-      }
-      String date = bridgesRawLine.substring(0, 10);
-      bridgesPerDayAndAuthority.putIfAbsent(date, new TreeMap<>());
-      String authority = parts[1];
-      bridgesPerDayAndAuthority.get(date).putIfAbsent(authority, new int[3]);
-      int[] bridges = bridgesPerDayAndAuthority.get(date).get(authority);
-      bridges[0] += brunning;
-      bridges[1] += Integer.parseInt(parts[3]);
-      bridges[2]++;
-    }
-
-    /* Sum up average numbers of running bridges per day reported by all bridge
-     * authorities and add these averages to final results. */
-    for (Map.Entry<String, Map<String, int[]>> perDay
-        : bridgesPerDayAndAuthority.entrySet()) {
-      String date = perDay.getKey();
-      int brunning = 0;
-      int brunningEc2 = 0;
-      for (int[] perAuthority : perDay.getValue().values()) {
-        int statuses = perAuthority[2];
-        if (statuses < 12) {
-          /* Only write results if we have seen at least a dozen statuses. */
-          continue;
-        }
-        brunning += perAuthority[0] / statuses;
-        brunningEc2 += perAuthority[1] / statuses;
-      }
-      String line = "," + brunning + "," + brunningEc2;
-      /* Are our results new? */
-      if (!this.bridgesPerDay.containsKey(date)) {
-        log.debug("Adding new average bridge numbers: {}{}", date, line);
-        this.bridgesPerDay.put(date, line);
-      } else if (!line.equals(this.bridgesPerDay.get(date))) {
-        log.debug("Replacing existing average bridge numbers ({} with new "
-            + "numbers: {}", this.bridgesPerDay.get(date), line);
-        this.bridgesPerDay.put(date, line);
-      }
-    }
-
-    /* Write raw numbers of running bridges to disk. */
-    log.debug("Writing file {}...",
-        this.bridgeConsensusStatsRawFile.getAbsolutePath());
-    this.bridgeConsensusStatsRawFile.getParentFile().mkdirs();
-    try (BufferedWriter bw = new BufferedWriter(
-        new FileWriter(this.bridgeConsensusStatsRawFile))) {
-      bw.append("datetime,authority,brunning,brunningec2");
-      bw.newLine();
-      for (String line : this.bridgesRaw.values()) {
-        bw.append(line);
-        bw.newLine();
-      }
-      log.debug("Finished writing file {}.",
-          this.bridgeConsensusStatsRawFile.getAbsolutePath());
-    } catch (IOException e) {
-      log.warn("Failed to write file {}!",
-          this.bridgeConsensusStatsRawFile.getAbsolutePath(), e);
-    }
-
-    /* Add average number of bridges per day to the database. */
-    if (connectionUrl != null) {
-      try {
-        Map<String, String> updateRows = new HashMap<>();
-        Map<String, String> insertRows = new HashMap<>(this.bridgesPerDay);
-        Connection conn = DriverManager.getConnection(connectionUrl);
-        conn.setAutoCommit(false);
-        Statement statement = conn.createStatement();
-        ResultSet rs = statement.executeQuery(
-            "SELECT date, avg_running, avg_running_ec2 "
-            + "FROM bridge_network_size");
-        while (rs.next()) {
-          String date = rs.getDate(1).toString();
-          if (insertRows.containsKey(date)) {
-            String insertRow = insertRows.remove(date);
-            String[] parts = insertRow.substring(1).split(",");
-            long newAvgRunning = Long.parseLong(parts[0]);
-            long newAvgRunningEc2 = Long.parseLong(parts[1]);
-            long oldAvgRunning = rs.getLong(2);
-            long oldAvgRunningEc2 = rs.getLong(3);
-            if (newAvgRunning != oldAvgRunning
-                || newAvgRunningEc2 != oldAvgRunningEc2) {
-              updateRows.put(date, insertRow);
-            }
-          }
-        }
-        rs.close();
-        PreparedStatement psU = conn.prepareStatement(
-            "UPDATE bridge_network_size SET avg_running = ?, "
-            + "avg_running_ec2 = ? WHERE date = ?");
-        for (Map.Entry<String, String> e : updateRows.entrySet()) {
-          java.sql.Date date = java.sql.Date.valueOf(e.getKey());
-          String[] parts = e.getValue().substring(1).split(",");
-          long avgRunning = Long.parseLong(parts[0]);
-          long avgRunningEc2 = Long.parseLong(parts[1]);
-          psU.clearParameters();
-          psU.setLong(1, avgRunning);
-          psU.setLong(2, avgRunningEc2);
-          psU.setDate(3, date);
-          psU.executeUpdate();
-        }
-        PreparedStatement psI = conn.prepareStatement(
-            "INSERT INTO bridge_network_size (avg_running, "
-            + "avg_running_ec2, date) VALUES (?, ?, ?)");
-        for (Map.Entry<String, String> e : insertRows.entrySet()) {
-          java.sql.Date date = java.sql.Date.valueOf(e.getKey());
-          String[] parts = e.getValue().substring(1).split(",");
-          long avgRunning = Long.parseLong(parts[0]);
-          long avgRunningEc2 = Long.parseLong(parts[1]);
-          psI.clearParameters();
-          psI.setLong(1, avgRunning);
-          psI.setLong(2, avgRunningEc2);
-          psI.setDate(3, date);
-          psI.executeUpdate();
-        }
-        conn.commit();
-        conn.close();
-      } catch (SQLException e) {
-        log.warn("Failed to add average bridge numbers to database.", e);
-      }
-    }
-
-    /* Write stats. */
-    StringBuilder dumpStats = new StringBuilder("Finished writing "
-        + "statistics on bridge network statuses to disk.\nAdded "
-        + this.bridgeResultsAdded + " bridge network status(es) in this "
-        + "execution.");
-    long now = System.currentTimeMillis();
-    SimpleDateFormat dateTimeFormat =
-        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    if (this.bridgesRaw.isEmpty()) {
-      dumpStats.append("\nNo bridge status known yet.");
-    } else {
-      dumpStats.append("\nLast known bridge status was published ")
-          .append(this.bridgesRaw.lastKey()).append(".");
-      try {
-        if (now - 6L * 60L * 60L * 1000L > dateTimeFormat.parse(
-            this.bridgesRaw.lastKey()).getTime()) {
-          log.warn("Last known bridge status is more than 6 hours old: {}",
-              this.bridgesRaw.lastKey());
-        }
-      } catch (ParseException e) {
-        log.warn("Can't parse the timestamp? Reason: {}", e);
-      }
-    }
-    log.info(dumpStats.toString());
-  }
-}
-
diff --git a/src/main/java/org/torproject/metrics/stats/servers/Main.java b/src/main/java/org/torproject/metrics/stats/servers/Main.java
index 080b6e4..4d349bc 100644
--- a/src/main/java/org/torproject/metrics/stats/servers/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/servers/Main.java
@@ -54,23 +54,6 @@ public class Main {
       }
     }
 
-    // Prepare consensus stats file handler (used for stats on running
-    // bridges only)
-    ConsensusStatsFileHandler csfh = config.getWriteBridgeStats()
-        ? new ConsensusStatsFileHandler(
-        config.getRelayDescriptorDatabaseJdbc(),
-        new File(config.getSanitizedBridgesDirectory()),
-        statsDirectory, config.getKeepSanitizedBridgesImportHistory())
-        : null;
-
-    // Import sanitized bridges and write updated stats files to disk
-    if (csfh != null) {
-      if (config.getImportSanitizedBridges()) {
-        csfh.importSanitizedBridges();
-      }
-      csfh.writeFiles();
-    }
-
     // Remove lock file
     lf.releaseLock();
 
diff --git a/src/main/resources/legacy.config.template b/src/main/resources/legacy.config.template
index afa8f2d..5475c1e 100644
--- a/src/main/resources/legacy.config.template
+++ b/src/main/resources/legacy.config.template
@@ -12,18 +12,6 @@
 ## again, but it can be confusing to users who don't know about it.
 #KeepDirectoryArchiveImportHistory 0
 #
-## Import sanitized bridges from disk, if available
-#ImportSanitizedBridges 0
-#
-## Relative path to directory to import sanitized bridges from
-#SanitizedBridgesDirectory /srv/metrics.torproject.org/metrics/shared/in/recent/bridge-descriptors/
-#
-## Keep a history of imported sanitized bridge descriptors. This history
-## can be useful when importing from a changing data source to avoid
-## importing descriptors more than once, but it can be confusing to users
-## who don't know about it.
-#KeepSanitizedBridgesImportHistory 0
-#
 ## Write relay descriptors to the database
 #WriteRelayDescriptorDatabase 0
 #
@@ -38,6 +26,3 @@
 ## files will be overwritten!
 #RelayDescriptorRawFilesDirectory pg-import/
 #
-## Write bridge stats to disk
-#WriteBridgeStats 0
-#
diff --git a/src/main/sql/legacy/tordir.sql b/src/main/sql/legacy/tordir.sql
index 16e7166..f1d6767 100644
--- a/src/main/sql/legacy/tordir.sql
+++ b/src/main/sql/legacy/tordir.sql
@@ -104,53 +104,6 @@ CREATE TABLE consensus (
     CONSTRAINT consensus_pkey PRIMARY KEY (validafter)
 );
 
--- TABLE network_size
-CREATE TABLE network_size (
-    date DATE NOT NULL,
-    avg_running INTEGER NOT NULL,
-    avg_exit INTEGER NOT NULL,
-    avg_guard INTEGER NOT NULL,
-    avg_fast INTEGER NOT NULL,
-    avg_stable INTEGER NOT NULL,
-    avg_authority INTEGER NOT NULL,
-    avg_badexit INTEGER NOT NULL,
-    avg_baddirectory INTEGER NOT NULL,
-    avg_hsdir INTEGER NOT NULL,
-    avg_named INTEGER NOT NULL,
-    avg_unnamed INTEGER NOT NULL,
-    avg_valid INTEGER NOT NULL,
-    avg_v2dir INTEGER NOT NULL,
-    avg_v3dir INTEGER NOT NULL,
-    CONSTRAINT network_size_pkey PRIMARY KEY(date)
-);
-
--- TABLE relay_countries
-CREATE TABLE relay_countries (
-    date DATE NOT NULL,
-    country CHARACTER(2) NOT NULL,
-    relays INTEGER NOT NULL,
-    CONSTRAINT relay_countries_pkey PRIMARY KEY(date, country)
-);
-
--- TABLE relay_platforms
-CREATE TABLE relay_platforms (
-    date DATE NOT NULL,
-    avg_linux INTEGER NOT NULL,
-    avg_darwin INTEGER NOT NULL,
-    avg_bsd INTEGER NOT NULL,
-    avg_windows INTEGER NOT NULL,
-    avg_other INTEGER NOT NULL,
-    CONSTRAINT relay_platforms_pkey PRIMARY KEY(date)
-);
-
--- TABLE relay_versions
-CREATE TABLE relay_versions (
-    date DATE NOT NULL,
-    version CHARACTER(5) NOT NULL,
-    relays INTEGER NOT NULL,
-    CONSTRAINT relay_versions_pkey PRIMARY KEY(date, version)
-);
-
 -- TABLE bandwidth_flags
 CREATE TABLE bandwidth_flags (
     date DATE NOT NULL,
@@ -299,157 +252,6 @@ $$ LANGUAGE plpgsql;
 -- They find what new data has been entered or updated based on the
 -- updates table.
 
--- FUNCTION refresh_network_size()
-CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$
-    DECLARE
-        min_date TIMESTAMP WITHOUT TIME ZONE;
-        max_date TIMESTAMP WITHOUT TIME ZONE;
-    BEGIN
-
-    min_date := (SELECT MIN(date) FROM updates);
-    max_date := (SELECT MAX(date) + 1 FROM updates);
-
-    DELETE FROM network_size
-    WHERE date IN (SELECT date FROM updates);
-
-    EXECUTE '
-        INSERT INTO network_size
-        (date, avg_running, avg_exit, avg_guard, avg_fast, avg_stable,
-        avg_authority, avg_badexit, avg_baddirectory, avg_hsdir,
-        avg_named, avg_unnamed, avg_valid, avg_v2dir, avg_v3dir)
-        SELECT date,
-            isrunning / count AS avg_running,
-            isexit / count AS avg_exit,
-            isguard / count AS avg_guard,
-            isfast / count AS avg_fast,
-            isstable / count AS avg_stable,
-            isauthority / count as avg_authority,
-            isbadexit / count as avg_badexit,
-            isbaddirectory / count as avg_baddirectory,
-            ishsdir / count as avg_hsdir,
-            isnamed / count as avg_named,
-            isunnamed / count as avg_unnamed,
-            isvalid / count as avg_valid,
-            isv2dir / count as avg_v2dir,
-            isv3dir / count as avg_v3dir
-        FROM (
-            SELECT DATE(validafter) AS date,
-                COUNT(*) AS isrunning,
-                COUNT(NULLIF(isexit, FALSE)) AS isexit,
-                COUNT(NULLIF(isguard, FALSE)) AS isguard,
-                COUNT(NULLIF(isfast, FALSE)) AS isfast,
-                COUNT(NULLIF(isstable, FALSE)) AS isstable,
-                COUNT(NULLIF(isauthority, FALSE)) AS isauthority,
-                COUNT(NULLIF(isbadexit, FALSE)) AS isbadexit,
-                COUNT(NULLIF(isbaddirectory, FALSE)) AS isbaddirectory,
-                COUNT(NULLIF(ishsdir, FALSE)) AS ishsdir,
-                COUNT(NULLIF(isnamed, FALSE)) AS isnamed,
-                COUNT(NULLIF(isunnamed, FALSE)) AS isunnamed,
-                COUNT(NULLIF(isvalid, FALSE)) AS isvalid,
-                COUNT(NULLIF(isv2dir, FALSE)) AS isv2dir,
-                COUNT(NULLIF(isv3dir, FALSE)) AS isv3dir
-            FROM statusentry
-            WHERE isrunning = TRUE
-              AND validafter >= ''' || min_date || '''
-              AND validafter < ''' || max_date || '''
-              AND DATE(validafter) IN (SELECT date FROM updates)
-            GROUP BY DATE(validafter)
-            ) b
-        NATURAL JOIN relay_statuses_per_day';
-
-    RETURN 1;
-    END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_relay_platforms()
-CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$
-    DECLARE
-        min_date TIMESTAMP WITHOUT TIME ZONE;
-        max_date TIMESTAMP WITHOUT TIME ZONE;
-    BEGIN
-
-    min_date := (SELECT MIN(date) FROM updates);
-    max_date := (SELECT MAX(date) + 1 FROM updates);
-
-    DELETE FROM relay_platforms
-    WHERE date IN (SELECT date FROM updates);
-
-    EXECUTE '
-    INSERT INTO relay_platforms
-    (date, avg_linux, avg_darwin, avg_bsd, avg_windows, avg_other)
-    SELECT date,
-        linux / count AS avg_linux,
-        darwin / count AS avg_darwin,
-        bsd / count AS avg_bsd,
-        windows / count AS avg_windows,
-        other / count AS avg_other
-    FROM (
-        SELECT DATE(validafter) AS date,
-            SUM(CASE WHEN platform LIKE ''%Linux%'' THEN 1 ELSE 0 END)
-                AS linux,
-            SUM(CASE WHEN platform LIKE ''%Darwin%'' THEN 1 ELSE 0 END)
-                AS darwin,
-            SUM(CASE WHEN platform LIKE ''%BSD%'' THEN 1 ELSE 0 END)
-                AS bsd,
-            SUM(CASE WHEN platform LIKE ''%Windows%'' THEN 1 ELSE 0 END)
-                AS windows,
-            SUM(CASE WHEN platform NOT LIKE ''%Windows%''
-                AND platform NOT LIKE ''%Darwin%''
-                AND platform NOT LIKE ''%BSD%''
-                AND platform NOT LIKE ''%Linux%'' THEN 1 ELSE 0 END)
-                AS other
-        FROM descriptor
-        RIGHT JOIN statusentry
-        ON statusentry.descriptor = descriptor.descriptor
-        WHERE isrunning = TRUE
-          AND validafter >= ''' || min_date || '''
-          AND validafter < ''' || max_date || '''
-          AND DATE(validafter) IN (SELECT date FROM updates)
-        GROUP BY DATE(validafter)
-        ) b
-    NATURAL JOIN relay_statuses_per_day';
-
-    RETURN 1;
-    END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_relay_versions()
-CREATE OR REPLACE FUNCTION refresh_relay_versions() RETURNS INTEGER AS $$
-    DECLARE
-        min_date TIMESTAMP WITHOUT TIME ZONE;
-        max_date TIMESTAMP WITHOUT TIME ZONE;
-    BEGIN
-
-    min_date := (SELECT MIN(date) FROM updates);
-    max_date := (SELECT MAX(date) + 1 FROM updates);
-
-    DELETE FROM relay_versions
-    WHERE date IN (SELECT date FROM updates);
-
-    EXECUTE '
-    INSERT INTO relay_versions
-    (date, version, relays)
-    SELECT date, version, relays / count AS relays
-    FROM (
-        SELECT DATE(validafter),
-               CASE WHEN platform LIKE ''Tor 0._._%'' THEN
-               SUBSTRING(platform, 5, 5) ELSE ''Other'' END AS version,
-               COUNT(*) AS relays
-        FROM descriptor RIGHT JOIN statusentry
-        ON descriptor.descriptor = statusentry.descriptor
-        WHERE isrunning = TRUE
-              AND platform IS NOT NULL
-              AND validafter >= ''' || min_date || '''
-              AND validafter < ''' || max_date || '''
-              AND DATE(validafter) IN (SELECT date FROM updates)
-        GROUP BY 1, 2
-        ) b
-    NATURAL JOIN relay_statuses_per_day';
-
-    RETURN 1;
-    END;
-$$ LANGUAGE plpgsql;
-
 CREATE OR REPLACE FUNCTION refresh_bandwidth_flags() RETURNS INTEGER AS $$
     DECLARE
         min_date TIMESTAMP WITHOUT TIME ZONE;
@@ -581,20 +383,6 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
   END;
 $$ LANGUAGE plpgsql;
 
--- non-relay statistics
--- The following tables contain pre-aggregated statistics that are not
--- based on relay descriptors or that are not yet derived from the relay
--- descriptors in the database.
-
--- TABLE bridge_network_size
--- Contains average number of running bridges.
-CREATE TABLE bridge_network_size (
-    "date" DATE NOT NULL,
-    avg_running INTEGER NOT NULL,
-    avg_running_ec2 INTEGER NOT NULL,
-    CONSTRAINT bridge_network_size_pkey PRIMARY KEY(date)
-);
-
 -- Refresh all statistics in the database.
 CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
   BEGIN
@@ -605,12 +393,6 @@ CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
     INSERT INTO updates SELECT * FROM scheduled_updates;
     RAISE NOTICE '% Refreshing relay statuses per day.', timeofday();
     PERFORM refresh_relay_statuses_per_day();
-    RAISE NOTICE '% Refreshing network size.', timeofday();
-    PERFORM refresh_network_size();
-    RAISE NOTICE '% Refreshing relay platforms.', timeofday();
-    PERFORM refresh_relay_platforms();
-    RAISE NOTICE '% Refreshing relay versions.', timeofday();
-    PERFORM refresh_relay_versions();
     RAISE NOTICE '% Refreshing total relay bandwidth.', timeofday();
     PERFORM refresh_bandwidth_flags();
     RAISE NOTICE '% Refreshing bandwidth history.', timeofday();
@@ -630,72 +412,6 @@ CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
   END;
 $$ LANGUAGE plpgsql;
 
--- View for exporting server statistics.
-CREATE VIEW stats_servers AS
-  (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
-  NULL AS platform, TRUE AS ec2bridge, NULL AS relays,
-  avg_running_ec2 AS bridges FROM bridge_network_size
-  WHERE date < current_date)
-UNION ALL
-  (SELECT COALESCE(network_size.date, bridge_network_size.date) AS date,
-  NULL AS flag, NULL AS country, NULL AS version, NULL AS platform,
-  NULL AS ec2bridge, network_size.avg_running AS relays,
-  bridge_network_size.avg_running AS bridges FROM network_size
-  FULL OUTER JOIN bridge_network_size
-  ON network_size.date = bridge_network_size.date
-  WHERE COALESCE(network_size.date, bridge_network_size.date) <
-  current_date)
-UNION ALL
-  (SELECT date, 'Exit' AS flag, NULL AS country, NULL AS version,
-  NULL AS platform, NULL AS ec2bridge, avg_exit AS relays,
-  NULL AS bridges FROM network_size WHERE date < current_date)
-UNION ALL
-  (SELECT date, 'Guard' AS flag, NULL AS country, NULL AS version,
-  NULL AS platform, NULL AS ec2bridge, avg_guard AS relays,
-  NULL AS bridges FROM network_size WHERE date < current_date)
-UNION ALL
-  (SELECT date, 'Fast' AS flag, NULL AS country, NULL AS version,
-  NULL AS platform, NULL AS ec2bridge, avg_fast AS relays,
-  NULL AS bridges FROM network_size WHERE date < current_date)
-UNION ALL
-  (SELECT date, 'Stable' AS flag, NULL AS country, NULL AS version,
-  NULL AS platform, NULL AS ec2bridge, avg_stable AS relays,
-  NULL AS bridges FROM network_size WHERE date < current_date)
-UNION ALL
-  (SELECT date, 'HSDir' AS flag, NULL AS country, NULL AS version,
-  NULL AS platform, NULL AS ec2bridge, avg_hsdir AS relays,
-  NULL AS bridges FROM network_size WHERE date < current_date)
-UNION ALL
-  (SELECT date, NULL AS flag, CASE WHEN country != 'zz' THEN country
-  ELSE '??' END AS country, NULL AS version, NULL AS platform,
-  NULL AS ec2bridge, relays, NULL AS bridges FROM relay_countries
-  WHERE date < current_date)
-UNION ALL
-  (SELECT date, NULL AS flag, NULL AS country, version, NULL AS platform,
-  NULL AS ec2bridge, relays, NULL AS bridges FROM relay_versions
-  WHERE date < current_date)
-UNION ALL
-  (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
-  'Linux' AS platform, NULL AS ec2bridge, avg_linux AS relays,
-  NULL AS bridges FROM relay_platforms WHERE date < current_date)
-UNION ALL
-  (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
-  'Darwin' AS platform, NULL AS ec2bridge, avg_darwin AS relays,
-  NULL AS bridges FROM relay_platforms WHERE date < current_date)
-UNION ALL
-  (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
-  'BSD' AS platform, NULL AS ec2bridge, avg_bsd AS relays,
-  NULL AS bridges FROM relay_platforms WHERE date < current_date)
-UNION ALL
-  (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
-  'Windows' AS platform, NULL AS ec2bridge, avg_windows AS relays,
-  NULL AS bridges FROM relay_platforms WHERE date < current_date)
-UNION ALL
-  (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
-  'Other' AS platform, NULL AS ec2bridge, avg_other AS relays,
-  NULL AS bridges FROM relay_platforms WHERE date < current_date)
-ORDER BY date, flag, country, version, platform, ec2bridge;
-
 -- View for exporting bandwidth statistics.
 CREATE VIEW stats_bandwidth AS
   (SELECT COALESCE(bandwidth_flags.date, bwhist_flags.date) AS date,





More information about the tor-commits mailing list