[metrics-web/master] Use metrics-lib for parsing bridge descriptors, too.

commit 333a9c497b8558d584af7e27d953f50ad800aa11 Author: Karsten Loesing <karsten.loesing@gmx.net> Date: Wed Mar 21 16:24:23 2012 +0100 Use metrics-lib for parsing bridge descriptors, too. --- .../ernie/cron/BridgeStatsFileHandler.java | 27 ++- .../ernie/cron/ConsensusStatsFileHandler.java | 8 +- .../ernie/cron/SanitizedBridgesReader.java | 299 ++++++-------------- 3 files changed, 111 insertions(+), 223 deletions(-) diff --git a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java index 6e7f4eb..aba7804 100644 --- a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java +++ b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java @@ -14,12 +14,15 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.SortedSet; +import java.util.TimeZone; import java.util.TreeMap; import java.util.TreeSet; import java.util.logging.Level; @@ -95,6 +98,8 @@ public class BridgeStatsFileHandler { /* Database connection string. */ private String connectionURL = null; + private SimpleDateFormat dateTimeFormat; + /** * Initializes this class, including reading in intermediate results * files <code>stats/bridge-stats-raw</code> and @@ -122,6 +127,9 @@ public class BridgeStatsFileHandler { /* Initialize database connection string. */ this.connectionURL = connectionURL; + this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + /* Initialize logger. */ this.logger = Logger.getLogger( BridgeStatsFileHandler.class.getName()); @@ -175,7 +183,9 @@ public class BridgeStatsFileHandler { obs.put(headers[i], parts[i]); } } - this.addObs(hashedBridgeIdentity, date, time, obs); + long dateTimeMillis = dateTimeFormat.parse(date + " " + + time).getTime(); + this.addObs(hashedBridgeIdentity, dateTimeMillis, obs); } } } @@ -185,6 +195,9 @@ public class BridgeStatsFileHandler { } catch (IOException e) { this.logger.log(Level.WARNING, "Failed to read file " + this.bridgeStatsRawFile.getAbsolutePath() + "!", e); + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Failed to read file " + + this.bridgeStatsRawFile.getAbsolutePath() + "!", e); } } @@ -253,9 +266,10 @@ public class BridgeStatsFileHandler { * not included in the results, because stats are very likely broken. */ public void addZeroTwoTwoDescriptor(String hashedBridgeIdentity, - String date, String time) { - String value = hashedBridgeIdentity.toUpperCase() + "," + date + "," - + time; + long publishedMillis) { + String value = hashedBridgeIdentity.toUpperCase() + "," + + this.dateTimeFormat.format(publishedMillis). + replaceAll(" ", ","); if (!this.zeroTwoTwoDescriptors.contains(value)) { this.logger.finer("Adding new bridge 0.2.2.x extra-info " + "descriptor: " + value); @@ -281,11 +295,14 @@ public class BridgeStatsFileHandler { * bridge and day, we keep the one with the later publication time and * discard the other one. */ - public void addObs(String hashedIdentity, String date, String time, + public void addObs(String hashedIdentity, long publishedMillis, Map<String, String> obs) { for (String country : obs.keySet()) { this.countries.add(country); } + String dateTime = this.dateTimeFormat.format(publishedMillis); + String date = dateTime.split(" ")[0]; + String time = dateTime.split(" ")[1]; String shortKey = hashedIdentity + "," + date; String longKey = shortKey + "," + time; SortedMap<String, Map<String, String>> tailMap = diff --git a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java index 6c83b05..fccb29b 100644 --- a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java +++ b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java @@ -66,6 +66,8 @@ public class ConsensusStatsFileHandler { /* Database connection string. */ private String connectionURL = null; + private SimpleDateFormat dateTimeFormat; + /** * Initializes this class, including reading in intermediate results * files <code>stats/consensus-stats-raw</code> and @@ -86,6 +88,9 @@ public class ConsensusStatsFileHandler { /* Initialize database connection string. */ this.connectionURL = connectionURL; + this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + /* Initialize logger. */ this.logger = Logger.getLogger( ConsensusStatsFileHandler.class.getName()); @@ -128,7 +133,8 @@ public class ConsensusStatsFileHandler { * Adds the intermediate results of the number of running bridges in a * given bridge status to the existing observations. */ - public void addBridgeConsensusResults(String published, int running) { + public void addBridgeConsensusResults(long publishedMillis, int running) { + String published = dateTimeFormat.format(publishedMillis); String line = published + "," + running; if (!this.bridgesRaw.containsKey(published)) { this.logger.finer("Adding new bridge numbers: " + line); diff --git a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java index 78bd7db..c7199cd 100644 --- a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java +++ b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java @@ -2,30 +2,20 @@ * See LICENSE for licensing information */ package org.torproject.ernie.cron; -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; import java.io.File; -import java.io.FileInputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.StringReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; +import java.util.Iterator; import java.util.Map; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.codec.digest.DigestUtils; +import org.torproject.descriptor.BridgeNetworkStatus; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.ServerDescriptor; public class SanitizedBridgesReader { private ConsensusStatsFileHandler csfh; @@ -45,217 +35,92 @@ public class SanitizedBridgesReader { this.logger = Logger.getLogger(SanitizedBridgesReader.class.getName()); - SortedSet<String> bridgesImportHistory = new TreeSet<String>(); - File bridgesImportHistoryFile = - new File(statsDirectory, "bridges-import-history"); - if (keepImportHistory && bridgesImportHistoryFile.exists()) { - try { - BufferedReader br = new BufferedReader(new FileReader( - bridgesImportHistoryFile)); - String line = null; - while ((line = br.readLine()) != null) { - bridgesImportHistory.add(line); - } - br.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not read in bridge descriptor " - + "import history file. Skipping."); - } - } if (bridgesDir.exists()) { logger.fine("Importing files in directory " + bridgesDir + "/..."); - Stack<File> filesInInputDir = new Stack<File>(); - filesInInputDir.add(bridgesDir); - List<File> problems = new ArrayList<File>(); - while (!filesInInputDir.isEmpty()) { - File pop = filesInInputDir.pop(); - if (pop.isDirectory()) { - for (File f : pop.listFiles()) { - filesInInputDir.add(f); - } - continue; - } else if (keepImportHistory && bridgesImportHistory.contains( - pop.getName())) { - continue; - } else { - try { - BufferedInputStream bis = new BufferedInputStream( - new FileInputStream(pop)); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - bis.close(); - byte[] allData = baos.toByteArray(); - String fn = pop.getName(); - // TODO dateTime extraction doesn't work for sanitized network - // statuses! - String dateTime = fn.substring(0, 4) + "-" + fn.substring(4, 6) - + "-" + fn.substring(6, 8) + " " + fn.substring(9, 11) - + ":" + fn.substring(11, 13) + ":" + fn.substring(13, 15); - this.parse(allData, dateTime, true); - if (keepImportHistory) { - bridgesImportHistory.add(pop.getName()); - } - } catch (IOException e) { - problems.add(pop); - if (problems.size() > 3) { - break; - } - } - } + DescriptorReader reader = + DescriptorSourceFactory.createDescriptorReader(); + reader.addDirectory(bridgesDir); + if (keepImportHistory) { + reader.setExcludeFiles(new File(statsDirectory, + "bridge-descriptor-history")); } - if (problems.isEmpty()) { - logger.fine("Finished importing files in directory " + bridgesDir - + "/."); - } else { - StringBuilder sb = new StringBuilder("Failed importing files in " - + "directory " + bridgesDir + "/:"); - int printed = 0; - for (File f : problems) { - sb.append("\n " + f.getAbsolutePath()); - if (++printed >= 3) { - sb.append("\n ... more"); - break; + Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + if (descriptorFile.getDescriptors() != null) { + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (descriptor instanceof BridgeNetworkStatus) { + this.addBridgeNetworkStatus( + (BridgeNetworkStatus) descriptor); + } else if (descriptor instanceof ServerDescriptor) { + this.addServerDescriptor((ServerDescriptor) descriptor); + } else if (descriptor instanceof ExtraInfoDescriptor) { + this.addExtraInfoDescriptor( + (ExtraInfoDescriptor) descriptor); + } } } - logger.warning(sb.toString()); } - if (keepImportHistory) { - try { - bridgesImportHistoryFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - bridgesImportHistoryFile)); - for (String line : bridgesImportHistory) { - bw.write(line + "\n"); - } - bw.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not write bridge descriptor " - + "import history file."); - } + logger.info("Finished importing bridge descriptors."); + } + } + + private void addBridgeNetworkStatus(BridgeNetworkStatus status) { + int runningBridges = 0; + for (NetworkStatusEntry statusEntry : + status.getStatusEntries().values()) { + if (statusEntry.getFlags().contains("Running")) { + runningBridges++; } } + this.csfh.addBridgeConsensusResults(status.getPublishedMillis(), + runningBridges); } - private void parse(byte[] allData, String dateTime, boolean sanitized) { - try { - BufferedReader br = new BufferedReader(new StringReader( - new String(allData, "US-ASCII"))); - SimpleDateFormat timeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - timeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - String hashedIdentity = null, platformLine = null, - publishedLine = null, geoipStartTimeLine = null, - bridgeStatsEndLine = null; - boolean skip = false; - String line = null; - while ((line = br.readLine()) != null) { - if (line.startsWith("r ")) { - int runningBridges = 0; - while ((line = br.readLine()) != null) { - if (line.startsWith("s ") && line.contains(" Running")) { - runningBridges++; - } - } - if (this.csfh != null) { - this.csfh.addBridgeConsensusResults(dateTime, runningBridges); - } - } else if (line.startsWith("router ")) { - } else if (line.startsWith("extra-info ")) { - hashedIdentity = sanitized ? line.split(" ")[2] - : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase(); - if (this.bsfh != null) { - skip = this.bsfh.isKnownRelay(hashedIdentity); - } - } else if (!skip && line.startsWith("platform ")) { - platformLine = line; - } else if (!skip && line.startsWith("published ")) { - publishedLine = line; - } else if (line.startsWith("opt fingerprint") || - line.startsWith("fingerprint")) { - String identity = line.substring(line.startsWith("opt ") ? - "opt fingerprint".length() : "fingerprint".length()). - replaceAll(" ", "").toLowerCase(); - hashedIdentity = sanitized ? identity - : DigestUtils.shaHex(identity).toUpperCase(); - } else if (!skip && line.startsWith("geoip-start-time ")) { - geoipStartTimeLine = line; - } else if (!skip && line.startsWith("geoip-client-origins") - && line.split(" ").length > 1) { - if (publishedLine == null || - geoipStartTimeLine == null) { - this.logger.warning("Either published line or " - + "geoip-start-time line is not present in " - + (sanitized ? "sanitized" : "non-sanitized") - + " bridge descriptors from " + dateTime + "."); - break; - } - long published = timeFormat.parse(publishedLine. - substring("published ".length())).getTime(); - long started = timeFormat.parse(geoipStartTimeLine. - substring("geoip-start-time ".length())).getTime(); - long seconds = (published - started) / 1000L; - double allUsers = 0.0D; - Map<String, String> obs = new HashMap<String, String>(); - String[] parts = line.split(" ")[1].split(","); - for (String p : parts) { - String country = p.substring(0, 2); - double users = ((double) Long.parseLong(p.substring(3)) - 4L) - * 86400.0D / ((double) seconds); - allUsers += users; - obs.put(country, String.format("%.2f", users)); - } - obs.put("zy", String.format("%.2f", allUsers)); - String date = publishedLine.split(" ")[1]; - String time = publishedLine.split(" ")[2]; - if (this.bsfh != null) { - this.bsfh.addObs(hashedIdentity, date, time, obs); - } - } else if (!skip && line.startsWith("bridge-stats-end ")) { - bridgeStatsEndLine = line; - } else if (!skip && line.startsWith("bridge-ips") - && line.split(" ").length > 1) { - if (bridgeStatsEndLine == null) { - this.logger.warning("bridge-ips line without preceding " - + "bridge-stats-end line in " - + (sanitized ? "sanitized" : "non-sanitized") - + " bridge descriptor."); - break; - } - double allUsers = 0.0D; - Map<String, String> obs = new HashMap<String, String>(); - String[] parts = line.split(" ")[1].split(","); - for (String p : parts) { - String country = p.substring(0, 2); - double users = (double) Long.parseLong(p.substring(3)) - 4L; - allUsers += users; - obs.put(country, String.format("%.2f", users)); - } - obs.put("zy", String.format("%.2f", allUsers)); - String date = bridgeStatsEndLine.split(" ")[1]; - String time = bridgeStatsEndLine.split(" ")[2]; - if (this.bsfh != null) { - this.bsfh.addObs(hashedIdentity, date, time, obs); - } + private void addServerDescriptor(ServerDescriptor descriptor) { + if (descriptor.getPlatform() != null && + descriptor.getPlatform().startsWith("Tor 0.2.2")) { + this.bsfh.addZeroTwoTwoDescriptor(descriptor.getFingerprint(), + descriptor.getPublishedMillis()); + } + } + + private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) { + if (!this.bsfh.isKnownRelay(descriptor.getFingerprint())) { + if (descriptor.getGeoipStartTimeMillis() >= 0 && + descriptor.getGeoipClientOrigins() != null) { + long seconds = (descriptor.getPublishedMillis() + - descriptor.getGeoipStartTimeMillis()) / 1000L; + double allUsers = 0.0D; + Map<String, String> obs = new HashMap<String, String>(); + for (Map.Entry<String, Integer> e : + descriptor.getGeoipClientOrigins().entrySet()) { + String country = e.getKey(); + double users = ((double) e.getValue() - 4) * 86400.0D + / ((double) seconds); + allUsers += users; + obs.put(country, String.format("%.2f", users)); } + obs.put("zy", String.format("%.2f", allUsers)); + this.bsfh.addObs(descriptor.getFingerprint(), + descriptor.getPublishedMillis(), obs); } - if (this.bsfh != null && platformLine != null && - platformLine.startsWith("platform Tor 0.2.2")) { - String date = publishedLine.split(" ")[1]; - String time = publishedLine.split(" ")[2]; - this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time); + if (descriptor.getBridgeStatsEndMillis() >= 0 && + descriptor.getBridgeIps() != null) { + double allUsers = 0.0D; + Map<String, String> obs = new HashMap<String, String>(); + for (Map.Entry<String, Integer> e : + descriptor.getBridgeIps().entrySet()) { + String country = e.getKey(); + double users = (double) e.getValue() - 4; + allUsers += users; + obs.put(country, String.format("%.2f", users)); + } + obs.put("zy", String.format("%.2f", allUsers)); + this.bsfh.addObs(descriptor.getFingerprint(), + descriptor.getBridgeStatsEndMillis(), obs); } - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not parse bridge descriptor.", - e); - return; - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Could not parse bridge descriptor.", - e); - return; + } } }
participants (1)
-
karsten@torproject.org