[tor-commits] [metrics-web/master] Remove bridge descriptor parser.

karsten at torproject.org karsten at torproject.org
Thu Mar 22 11:48:14 UTC 2012


commit b4d34b4cb2e90584597d297cddde9aa228ae1c66
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Mar 21 12:30:49 2012 +0100

    Remove bridge descriptor parser.
    
    The purpose of this commit is to prepare adding metrics-lib as library for
    descriptor parsing.
---
 .../ernie/cron/BridgeDescriptorParser.java         |  148 --------------------
 src/org/torproject/ernie/cron/Main.java            |    9 +-
 .../ernie/cron/SanitizedBridgesReader.java         |  145 ++++++++++++++++++-
 .../ernie/test/SanitizedBridgesReaderTest.java     |   33 -----
 4 files changed, 143 insertions(+), 192 deletions(-)

diff --git a/src/org/torproject/ernie/cron/BridgeDescriptorParser.java b/src/org/torproject/ernie/cron/BridgeDescriptorParser.java
deleted file mode 100644
index 221e896..0000000
--- a/src/org/torproject/ernie/cron/BridgeDescriptorParser.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/* Copyright 2011, 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.cron;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TimeZone;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.digest.DigestUtils;
-
-public class BridgeDescriptorParser {
-  private ConsensusStatsFileHandler csfh;
-  private BridgeStatsFileHandler bsfh;
-  private Logger logger;
-  public BridgeDescriptorParser(ConsensusStatsFileHandler csfh,
-      BridgeStatsFileHandler bsfh) {
-    this.csfh = csfh;
-    this.bsfh = bsfh;
-    this.logger =
-        Logger.getLogger(BridgeDescriptorParser.class.getName());
-  }
-  public void parse(byte[] allData, String dateTime, boolean sanitized) {
-    try {
-      BufferedReader br = new BufferedReader(new StringReader(
-          new String(allData, "US-ASCII")));
-      SimpleDateFormat timeFormat = new SimpleDateFormat(
-          "yyyy-MM-dd HH:mm:ss");
-      timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-      String hashedIdentity = null, platformLine = null,
-          publishedLine = null, geoipStartTimeLine = null,
-          bridgeStatsEndLine = null;
-      boolean skip = false;
-      String line = null;
-      while ((line = br.readLine()) != null) {
-        if (line.startsWith("r ")) {
-          int runningBridges = 0;
-          while ((line = br.readLine()) != null) {
-            if (line.startsWith("s ") && line.contains(" Running")) {
-              runningBridges++;
-            }
-          }
-          if (this.csfh != null) {
-            this.csfh.addBridgeConsensusResults(dateTime, runningBridges);
-          }
-        } else if (line.startsWith("router ")) {
-        } else if (line.startsWith("extra-info ")) {
-          hashedIdentity = sanitized ? line.split(" ")[2]
-              : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase();
-          if (this.bsfh != null) {
-            skip = this.bsfh.isKnownRelay(hashedIdentity);
-          }
-        } else if (!skip && line.startsWith("platform ")) {
-          platformLine = line;
-        } else if (!skip && line.startsWith("published ")) {
-          publishedLine = line;
-        } else if (line.startsWith("opt fingerprint") ||
-            line.startsWith("fingerprint")) {
-          String identity = line.substring(line.startsWith("opt ") ?
-              "opt fingerprint".length() : "fingerprint".length()).
-              replaceAll(" ", "").toLowerCase();
-          hashedIdentity = sanitized ? identity
-              : DigestUtils.shaHex(identity).toUpperCase();
-        } else if (!skip && line.startsWith("geoip-start-time ")) {
-          geoipStartTimeLine = line;
-        } else if (!skip && line.startsWith("geoip-client-origins")
-            && line.split(" ").length > 1) {
-          if (publishedLine == null ||
-              geoipStartTimeLine == null) {
-            this.logger.warning("Either published line or "
-                + "geoip-start-time line is not present in "
-                + (sanitized ? "sanitized" : "non-sanitized")
-                + " bridge descriptors from " + dateTime + ".");
-            break;
-          }
-          long published = timeFormat.parse(publishedLine.
-              substring("published ".length())).getTime();
-          long started = timeFormat.parse(geoipStartTimeLine.
-              substring("geoip-start-time ".length())).getTime();
-          long seconds = (published - started) / 1000L;
-          double allUsers = 0.0D;
-          Map<String, String> obs = new HashMap<String, String>();
-          String[] parts = line.split(" ")[1].split(",");
-          for (String p : parts) {
-            String country = p.substring(0, 2);
-            double users = ((double) Long.parseLong(p.substring(3)) - 4L)
-                    * 86400.0D / ((double) seconds);
-            allUsers += users;
-            obs.put(country, String.format("%.2f", users));
-          }
-          obs.put("zy", String.format("%.2f", allUsers));
-          String date = publishedLine.split(" ")[1];
-          String time = publishedLine.split(" ")[2];
-          if (this.bsfh != null) {
-            this.bsfh.addObs(hashedIdentity, date, time, obs);
-          }
-        } else if (!skip && line.startsWith("bridge-stats-end ")) {
-          bridgeStatsEndLine = line;
-        } else if (!skip && line.startsWith("bridge-ips")
-            && line.split(" ").length > 1) {
-          if (bridgeStatsEndLine == null) {
-            this.logger.warning("bridge-ips line without preceding "
-                + "bridge-stats-end line in "
-                + (sanitized ? "sanitized" : "non-sanitized")
-                + " bridge descriptor.");
-            break;
-          }
-          double allUsers = 0.0D;
-          Map<String, String> obs = new HashMap<String, String>();
-          String[] parts = line.split(" ")[1].split(",");
-          for (String p : parts) {
-            String country = p.substring(0, 2);
-            double users = (double) Long.parseLong(p.substring(3)) - 4L;
-            allUsers += users;
-            obs.put(country, String.format("%.2f", users));
-          }
-          obs.put("zy", String.format("%.2f", allUsers));
-          String date = bridgeStatsEndLine.split(" ")[1];
-          String time = bridgeStatsEndLine.split(" ")[2];
-          if (this.bsfh != null) {
-            this.bsfh.addObs(hashedIdentity, date, time, obs);
-          }
-        }
-      }
-      if (this.bsfh != null && platformLine != null &&
-          platformLine.startsWith("platform Tor 0.2.2")) {
-        String date = publishedLine.split(" ")[1];
-        String time = publishedLine.split(" ")[2];
-        this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time);
-      }
-    } catch (IOException e) {
-      this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
-          e);
-      return;
-    } catch (ParseException e) {
-      this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
-          e);
-      return;
-    }
-  }
-}
-
diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java
index d631a7b..1c8ca48 100644
--- a/src/org/torproject/ernie/cron/Main.java
+++ b/src/org/torproject/ernie/cron/Main.java
@@ -68,13 +68,10 @@ public class Main {
         new ConsensusStatsFileHandler(
         config.getRelayDescriptorDatabaseJDBC()) : null;
 
-    // Prepare bridge descriptor parser
-    BridgeDescriptorParser bdp = config.getWriteBridgeStats() ?
-        new BridgeDescriptorParser(csfh, bsfh) : null;
-
     // Import bridge descriptors
-    if (bdp != null && config.getImportSanitizedBridges()) {
-      new SanitizedBridgesReader(bdp,
+    if (config.getWriteBridgeStats() &&
+        config.getImportSanitizedBridges()) {
+      new SanitizedBridgesReader(csfh, bsfh,
           new File(config.getSanitizedBridgesDirectory()),
           statsDirectory, config.getKeepSanitizedBridgesImportHistory());
     }
diff --git a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
index 09b4983..78bd7db 100644
--- a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
+++ b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
@@ -11,24 +11,40 @@ import java.io.FileInputStream;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.SortedSet;
 import java.util.Stack;
+import java.util.TimeZone;
 import java.util.TreeSet;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+import org.apache.commons.codec.digest.DigestUtils;
+
 public class SanitizedBridgesReader {
-  public SanitizedBridgesReader(BridgeDescriptorParser bdp,
-      File bridgesDir, File statsDirectory, boolean keepImportHistory) {
+  private ConsensusStatsFileHandler csfh;
+  private BridgeStatsFileHandler bsfh;
+  private Logger logger;
+  public SanitizedBridgesReader(ConsensusStatsFileHandler csfh,
+      BridgeStatsFileHandler bsfh, File bridgesDir, File statsDirectory,
+      boolean keepImportHistory) {
 
-    if (bdp == null || bridgesDir == null || statsDirectory == null) {
+    if (csfh == null || bsfh == null || bridgesDir == null ||
+        statsDirectory == null) {
       throw new IllegalArgumentException();
     }
 
-    Logger logger =
+    this.csfh = csfh;
+    this.bsfh = bsfh;
+    this.logger =
         Logger.getLogger(SanitizedBridgesReader.class.getName());
+
     SortedSet<String> bridgesImportHistory = new TreeSet<String>();
     File bridgesImportHistoryFile =
         new File(statsDirectory, "bridges-import-history");
@@ -79,7 +95,7 @@ public class SanitizedBridgesReader {
             String dateTime = fn.substring(0, 4) + "-" + fn.substring(4, 6)
                 + "-" + fn.substring(6, 8) + " " + fn.substring(9, 11)
                 + ":" + fn.substring(11, 13) + ":" + fn.substring(13, 15);
-            bdp.parse(allData, dateTime, true);
+            this.parse(allData, dateTime, true);
             if (keepImportHistory) {
               bridgesImportHistory.add(pop.getName());
             }
@@ -123,5 +139,124 @@ public class SanitizedBridgesReader {
       }
     }
   }
+
+  private void parse(byte[] allData, String dateTime, boolean sanitized) {
+    try {
+      BufferedReader br = new BufferedReader(new StringReader(
+          new String(allData, "US-ASCII")));
+      SimpleDateFormat timeFormat = new SimpleDateFormat(
+          "yyyy-MM-dd HH:mm:ss");
+      timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+      String hashedIdentity = null, platformLine = null,
+          publishedLine = null, geoipStartTimeLine = null,
+          bridgeStatsEndLine = null;
+      boolean skip = false;
+      String line = null;
+      while ((line = br.readLine()) != null) {
+        if (line.startsWith("r ")) {
+          int runningBridges = 0;
+          while ((line = br.readLine()) != null) {
+            if (line.startsWith("s ") && line.contains(" Running")) {
+              runningBridges++;
+            }
+          }
+          if (this.csfh != null) {
+            this.csfh.addBridgeConsensusResults(dateTime, runningBridges);
+          }
+        } else if (line.startsWith("router ")) {
+        } else if (line.startsWith("extra-info ")) {
+          hashedIdentity = sanitized ? line.split(" ")[2]
+              : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase();
+          if (this.bsfh != null) {
+            skip = this.bsfh.isKnownRelay(hashedIdentity);
+          }
+        } else if (!skip && line.startsWith("platform ")) {
+          platformLine = line;
+        } else if (!skip && line.startsWith("published ")) {
+          publishedLine = line;
+        } else if (line.startsWith("opt fingerprint") ||
+            line.startsWith("fingerprint")) {
+          String identity = line.substring(line.startsWith("opt ") ?
+              "opt fingerprint".length() : "fingerprint".length()).
+              replaceAll(" ", "").toLowerCase();
+          hashedIdentity = sanitized ? identity
+              : DigestUtils.shaHex(identity).toUpperCase();
+        } else if (!skip && line.startsWith("geoip-start-time ")) {
+          geoipStartTimeLine = line;
+        } else if (!skip && line.startsWith("geoip-client-origins")
+            && line.split(" ").length > 1) {
+          if (publishedLine == null ||
+              geoipStartTimeLine == null) {
+            this.logger.warning("Either published line or "
+                + "geoip-start-time line is not present in "
+                + (sanitized ? "sanitized" : "non-sanitized")
+                + " bridge descriptors from " + dateTime + ".");
+            break;
+          }
+          long published = timeFormat.parse(publishedLine.
+              substring("published ".length())).getTime();
+          long started = timeFormat.parse(geoipStartTimeLine.
+              substring("geoip-start-time ".length())).getTime();
+          long seconds = (published - started) / 1000L;
+          double allUsers = 0.0D;
+          Map<String, String> obs = new HashMap<String, String>();
+          String[] parts = line.split(" ")[1].split(",");
+          for (String p : parts) {
+            String country = p.substring(0, 2);
+            double users = ((double) Long.parseLong(p.substring(3)) - 4L)
+                    * 86400.0D / ((double) seconds);
+            allUsers += users;
+            obs.put(country, String.format("%.2f", users));
+          }
+          obs.put("zy", String.format("%.2f", allUsers));
+          String date = publishedLine.split(" ")[1];
+          String time = publishedLine.split(" ")[2];
+          if (this.bsfh != null) {
+            this.bsfh.addObs(hashedIdentity, date, time, obs);
+          }
+        } else if (!skip && line.startsWith("bridge-stats-end ")) {
+          bridgeStatsEndLine = line;
+        } else if (!skip && line.startsWith("bridge-ips")
+            && line.split(" ").length > 1) {
+          if (bridgeStatsEndLine == null) {
+            this.logger.warning("bridge-ips line without preceding "
+                + "bridge-stats-end line in "
+                + (sanitized ? "sanitized" : "non-sanitized")
+                + " bridge descriptor.");
+            break;
+          }
+          double allUsers = 0.0D;
+          Map<String, String> obs = new HashMap<String, String>();
+          String[] parts = line.split(" ")[1].split(",");
+          for (String p : parts) {
+            String country = p.substring(0, 2);
+            double users = (double) Long.parseLong(p.substring(3)) - 4L;
+            allUsers += users;
+            obs.put(country, String.format("%.2f", users));
+          }
+          obs.put("zy", String.format("%.2f", allUsers));
+          String date = bridgeStatsEndLine.split(" ")[1];
+          String time = bridgeStatsEndLine.split(" ")[2];
+          if (this.bsfh != null) {
+            this.bsfh.addObs(hashedIdentity, date, time, obs);
+          }
+        }
+      }
+      if (this.bsfh != null && platformLine != null &&
+          platformLine.startsWith("platform Tor 0.2.2")) {
+        String date = publishedLine.split(" ")[1];
+        String time = publishedLine.split(" ")[2];
+        this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time);
+      }
+    } catch (IOException e) {
+      this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
+          e);
+      return;
+    } catch (ParseException e) {
+      this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
+          e);
+      return;
+    }
+  }
 }
 
diff --git a/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java b/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java
deleted file mode 100644
index 748894e..0000000
--- a/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright 2011, 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.test;
-
-import java.io.File;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.torproject.ernie.cron.SanitizedBridgesReader;
-
-public class SanitizedBridgesReaderTest {
-
-  private File tempSanitizedBridgesDirectory;
-  private File tempStatsDirectory;
-
-  @Rule
-  public TemporaryFolder folder = new TemporaryFolder();
-
-  @Before
-  public void createTempDirectories() {
-    this.tempSanitizedBridgesDirectory = folder.newFolder("bridges");
-    this.tempStatsDirectory = folder.newFolder("stats");
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testBridgeDescriptorParserNull() {
-    new SanitizedBridgesReader(null, this.tempSanitizedBridgesDirectory,
-        this.tempStatsDirectory, false);
-  }
-}
-





More information about the tor-commits mailing list