
commit b4d34b4cb2e90584597d297cddde9aa228ae1c66 Author: Karsten Loesing <karsten.loesing@gmx.net> Date: Wed Mar 21 12:30:49 2012 +0100 Remove bridge descriptor parser. The purpose of this commit is to prepare adding metrics-lib as library for descriptor parsing. --- .../ernie/cron/BridgeDescriptorParser.java | 148 -------------------- src/org/torproject/ernie/cron/Main.java | 9 +- .../ernie/cron/SanitizedBridgesReader.java | 145 ++++++++++++++++++- .../ernie/test/SanitizedBridgesReaderTest.java | 33 ----- 4 files changed, 143 insertions(+), 192 deletions(-) diff --git a/src/org/torproject/ernie/cron/BridgeDescriptorParser.java b/src/org/torproject/ernie/cron/BridgeDescriptorParser.java deleted file mode 100644 index 221e896..0000000 --- a/src/org/torproject/ernie/cron/BridgeDescriptorParser.java +++ /dev/null @@ -1,148 +0,0 @@ -/* Copyright 2011, 2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.cron; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.StringReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.HashMap; -import java.util.Map; -import java.util.TimeZone; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.codec.digest.DigestUtils; - -public class BridgeDescriptorParser { - private ConsensusStatsFileHandler csfh; - private BridgeStatsFileHandler bsfh; - private Logger logger; - public BridgeDescriptorParser(ConsensusStatsFileHandler csfh, - BridgeStatsFileHandler bsfh) { - this.csfh = csfh; - this.bsfh = bsfh; - this.logger = - Logger.getLogger(BridgeDescriptorParser.class.getName()); - } - public void parse(byte[] allData, String dateTime, boolean sanitized) { - try { - BufferedReader br = new BufferedReader(new StringReader( - new String(allData, "US-ASCII"))); - SimpleDateFormat timeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - timeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - String hashedIdentity = null, platformLine = null, - publishedLine = null, geoipStartTimeLine = null, - bridgeStatsEndLine = null; - boolean skip = false; - String line = null; - while ((line = br.readLine()) != null) { - if (line.startsWith("r ")) { - int runningBridges = 0; - while ((line = br.readLine()) != null) { - if (line.startsWith("s ") && line.contains(" Running")) { - runningBridges++; - } - } - if (this.csfh != null) { - this.csfh.addBridgeConsensusResults(dateTime, runningBridges); - } - } else if (line.startsWith("router ")) { - } else if (line.startsWith("extra-info ")) { - hashedIdentity = sanitized ? line.split(" ")[2] - : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase(); - if (this.bsfh != null) { - skip = this.bsfh.isKnownRelay(hashedIdentity); - } - } else if (!skip && line.startsWith("platform ")) { - platformLine = line; - } else if (!skip && line.startsWith("published ")) { - publishedLine = line; - } else if (line.startsWith("opt fingerprint") || - line.startsWith("fingerprint")) { - String identity = line.substring(line.startsWith("opt ") ? - "opt fingerprint".length() : "fingerprint".length()). - replaceAll(" ", "").toLowerCase(); - hashedIdentity = sanitized ? identity - : DigestUtils.shaHex(identity).toUpperCase(); - } else if (!skip && line.startsWith("geoip-start-time ")) { - geoipStartTimeLine = line; - } else if (!skip && line.startsWith("geoip-client-origins") - && line.split(" ").length > 1) { - if (publishedLine == null || - geoipStartTimeLine == null) { - this.logger.warning("Either published line or " - + "geoip-start-time line is not present in " - + (sanitized ? "sanitized" : "non-sanitized") - + " bridge descriptors from " + dateTime + "."); - break; - } - long published = timeFormat.parse(publishedLine. - substring("published ".length())).getTime(); - long started = timeFormat.parse(geoipStartTimeLine. - substring("geoip-start-time ".length())).getTime(); - long seconds = (published - started) / 1000L; - double allUsers = 0.0D; - Map<String, String> obs = new HashMap<String, String>(); - String[] parts = line.split(" ")[1].split(","); - for (String p : parts) { - String country = p.substring(0, 2); - double users = ((double) Long.parseLong(p.substring(3)) - 4L) - * 86400.0D / ((double) seconds); - allUsers += users; - obs.put(country, String.format("%.2f", users)); - } - obs.put("zy", String.format("%.2f", allUsers)); - String date = publishedLine.split(" ")[1]; - String time = publishedLine.split(" ")[2]; - if (this.bsfh != null) { - this.bsfh.addObs(hashedIdentity, date, time, obs); - } - } else if (!skip && line.startsWith("bridge-stats-end ")) { - bridgeStatsEndLine = line; - } else if (!skip && line.startsWith("bridge-ips") - && line.split(" ").length > 1) { - if (bridgeStatsEndLine == null) { - this.logger.warning("bridge-ips line without preceding " - + "bridge-stats-end line in " - + (sanitized ? "sanitized" : "non-sanitized") - + " bridge descriptor."); - break; - } - double allUsers = 0.0D; - Map<String, String> obs = new HashMap<String, String>(); - String[] parts = line.split(" ")[1].split(","); - for (String p : parts) { - String country = p.substring(0, 2); - double users = (double) Long.parseLong(p.substring(3)) - 4L; - allUsers += users; - obs.put(country, String.format("%.2f", users)); - } - obs.put("zy", String.format("%.2f", allUsers)); - String date = bridgeStatsEndLine.split(" ")[1]; - String time = bridgeStatsEndLine.split(" ")[2]; - if (this.bsfh != null) { - this.bsfh.addObs(hashedIdentity, date, time, obs); - } - } - } - if (this.bsfh != null && platformLine != null && - platformLine.startsWith("platform Tor 0.2.2")) { - String date = publishedLine.split(" ")[1]; - String time = publishedLine.split(" ")[2]; - this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time); - } - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not parse bridge descriptor.", - e); - return; - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Could not parse bridge descriptor.", - e); - return; - } - } -} - diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java index d631a7b..1c8ca48 100644 --- a/src/org/torproject/ernie/cron/Main.java +++ b/src/org/torproject/ernie/cron/Main.java @@ -68,13 +68,10 @@ public class Main { new ConsensusStatsFileHandler( config.getRelayDescriptorDatabaseJDBC()) : null; - // Prepare bridge descriptor parser - BridgeDescriptorParser bdp = config.getWriteBridgeStats() ? - new BridgeDescriptorParser(csfh, bsfh) : null; - // Import bridge descriptors - if (bdp != null && config.getImportSanitizedBridges()) { - new SanitizedBridgesReader(bdp, + if (config.getWriteBridgeStats() && + config.getImportSanitizedBridges()) { + new SanitizedBridgesReader(csfh, bsfh, new File(config.getSanitizedBridgesDirectory()), statsDirectory, config.getKeepSanitizedBridgesImportHistory()); } diff --git a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java index 09b4983..78bd7db 100644 --- a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java +++ b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java @@ -11,24 +11,40 @@ import java.io.FileInputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.io.StringReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.SortedSet; import java.util.Stack; +import java.util.TimeZone; import java.util.TreeSet; import java.util.logging.Level; import java.util.logging.Logger; +import org.apache.commons.codec.digest.DigestUtils; + public class SanitizedBridgesReader { - public SanitizedBridgesReader(BridgeDescriptorParser bdp, - File bridgesDir, File statsDirectory, boolean keepImportHistory) { + private ConsensusStatsFileHandler csfh; + private BridgeStatsFileHandler bsfh; + private Logger logger; + public SanitizedBridgesReader(ConsensusStatsFileHandler csfh, + BridgeStatsFileHandler bsfh, File bridgesDir, File statsDirectory, + boolean keepImportHistory) { - if (bdp == null || bridgesDir == null || statsDirectory == null) { + if (csfh == null || bsfh == null || bridgesDir == null || + statsDirectory == null) { throw new IllegalArgumentException(); } - Logger logger = + this.csfh = csfh; + this.bsfh = bsfh; + this.logger = Logger.getLogger(SanitizedBridgesReader.class.getName()); + SortedSet<String> bridgesImportHistory = new TreeSet<String>(); File bridgesImportHistoryFile = new File(statsDirectory, "bridges-import-history"); @@ -79,7 +95,7 @@ public class SanitizedBridgesReader { String dateTime = fn.substring(0, 4) + "-" + fn.substring(4, 6) + "-" + fn.substring(6, 8) + " " + fn.substring(9, 11) + ":" + fn.substring(11, 13) + ":" + fn.substring(13, 15); - bdp.parse(allData, dateTime, true); + this.parse(allData, dateTime, true); if (keepImportHistory) { bridgesImportHistory.add(pop.getName()); } @@ -123,5 +139,124 @@ public class SanitizedBridgesReader { } } } + + private void parse(byte[] allData, String dateTime, boolean sanitized) { + try { + BufferedReader br = new BufferedReader(new StringReader( + new String(allData, "US-ASCII"))); + SimpleDateFormat timeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + timeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String hashedIdentity = null, platformLine = null, + publishedLine = null, geoipStartTimeLine = null, + bridgeStatsEndLine = null; + boolean skip = false; + String line = null; + while ((line = br.readLine()) != null) { + if (line.startsWith("r ")) { + int runningBridges = 0; + while ((line = br.readLine()) != null) { + if (line.startsWith("s ") && line.contains(" Running")) { + runningBridges++; + } + } + if (this.csfh != null) { + this.csfh.addBridgeConsensusResults(dateTime, runningBridges); + } + } else if (line.startsWith("router ")) { + } else if (line.startsWith("extra-info ")) { + hashedIdentity = sanitized ? line.split(" ")[2] + : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase(); + if (this.bsfh != null) { + skip = this.bsfh.isKnownRelay(hashedIdentity); + } + } else if (!skip && line.startsWith("platform ")) { + platformLine = line; + } else if (!skip && line.startsWith("published ")) { + publishedLine = line; + } else if (line.startsWith("opt fingerprint") || + line.startsWith("fingerprint")) { + String identity = line.substring(line.startsWith("opt ") ? + "opt fingerprint".length() : "fingerprint".length()). + replaceAll(" ", "").toLowerCase(); + hashedIdentity = sanitized ? identity + : DigestUtils.shaHex(identity).toUpperCase(); + } else if (!skip && line.startsWith("geoip-start-time ")) { + geoipStartTimeLine = line; + } else if (!skip && line.startsWith("geoip-client-origins") + && line.split(" ").length > 1) { + if (publishedLine == null || + geoipStartTimeLine == null) { + this.logger.warning("Either published line or " + + "geoip-start-time line is not present in " + + (sanitized ? "sanitized" : "non-sanitized") + + " bridge descriptors from " + dateTime + "."); + break; + } + long published = timeFormat.parse(publishedLine. + substring("published ".length())).getTime(); + long started = timeFormat.parse(geoipStartTimeLine. + substring("geoip-start-time ".length())).getTime(); + long seconds = (published - started) / 1000L; + double allUsers = 0.0D; + Map<String, String> obs = new HashMap<String, String>(); + String[] parts = line.split(" ")[1].split(","); + for (String p : parts) { + String country = p.substring(0, 2); + double users = ((double) Long.parseLong(p.substring(3)) - 4L) + * 86400.0D / ((double) seconds); + allUsers += users; + obs.put(country, String.format("%.2f", users)); + } + obs.put("zy", String.format("%.2f", allUsers)); + String date = publishedLine.split(" ")[1]; + String time = publishedLine.split(" ")[2]; + if (this.bsfh != null) { + this.bsfh.addObs(hashedIdentity, date, time, obs); + } + } else if (!skip && line.startsWith("bridge-stats-end ")) { + bridgeStatsEndLine = line; + } else if (!skip && line.startsWith("bridge-ips") + && line.split(" ").length > 1) { + if (bridgeStatsEndLine == null) { + this.logger.warning("bridge-ips line without preceding " + + "bridge-stats-end line in " + + (sanitized ? "sanitized" : "non-sanitized") + + " bridge descriptor."); + break; + } + double allUsers = 0.0D; + Map<String, String> obs = new HashMap<String, String>(); + String[] parts = line.split(" ")[1].split(","); + for (String p : parts) { + String country = p.substring(0, 2); + double users = (double) Long.parseLong(p.substring(3)) - 4L; + allUsers += users; + obs.put(country, String.format("%.2f", users)); + } + obs.put("zy", String.format("%.2f", allUsers)); + String date = bridgeStatsEndLine.split(" ")[1]; + String time = bridgeStatsEndLine.split(" ")[2]; + if (this.bsfh != null) { + this.bsfh.addObs(hashedIdentity, date, time, obs); + } + } + } + if (this.bsfh != null && platformLine != null && + platformLine.startsWith("platform Tor 0.2.2")) { + String date = publishedLine.split(" ")[1]; + String time = publishedLine.split(" ")[2]; + this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time); + } + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not parse bridge descriptor.", + e); + return; + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Could not parse bridge descriptor.", + e); + return; + } + } } diff --git a/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java b/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java deleted file mode 100644 index 748894e..0000000 --- a/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright 2011, 2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.test; - -import java.io.File; - -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.torproject.ernie.cron.SanitizedBridgesReader; - -public class SanitizedBridgesReaderTest { - - private File tempSanitizedBridgesDirectory; - private File tempStatsDirectory; - - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - @Before - public void createTempDirectories() { - this.tempSanitizedBridgesDirectory = folder.newFolder("bridges"); - this.tempStatsDirectory = folder.newFolder("stats"); - } - - @Test(expected = IllegalArgumentException.class) - public void testBridgeDescriptorParserNull() { - new SanitizedBridgesReader(null, this.tempSanitizedBridgesDirectory, - this.tempStatsDirectory, false); - } -} -