tor-commits
Threads by month
- ----- 2025 -----
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
March 2012
- 19 participants
- 1205 discussions

[metrics-web/master] Make sure fingerprints and digests are lower-case.
by karsten@torproject.org 22 Mar '12
by karsten@torproject.org 22 Mar '12
22 Mar '12
commit b8067f823c87bfc8c8a4af936922a9d34ee85bb2
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Mar 22 10:00:18 2012 +0100
Make sure fingerprints and digests are lower-case.
---
src/org/torproject/ernie/cron/ArchiveReader.java | 14 ++++++++------
1 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/src/org/torproject/ernie/cron/ArchiveReader.java b/src/org/torproject/ernie/cron/ArchiveReader.java
index 5703661..4befe51 100644
--- a/src/org/torproject/ernie/cron/ArchiveReader.java
+++ b/src/org/torproject/ernie/cron/ArchiveReader.java
@@ -109,12 +109,14 @@ public class ArchiveReader {
for (NetworkStatusEntry statusEntry :
consensus.getStatusEntries().values()) {
this.rddi.addStatusEntry(consensus.getValidAfterMillis(),
- statusEntry.getNickname(), statusEntry.getFingerprint(),
- statusEntry.getDescriptor(), statusEntry.getPublishedMillis(),
- statusEntry.getAddress(), statusEntry.getOrPort(),
- statusEntry.getDirPort(), statusEntry.getFlags(),
- statusEntry.getVersion(), statusEntry.getBandwidth(),
- statusEntry.getPortList(), statusEntry.getStatusEntryBytes());
+ statusEntry.getNickname(),
+ statusEntry.getFingerprint().toLowerCase(),
+ statusEntry.getDescriptor().toLowerCase(),
+ statusEntry.getPublishedMillis(), statusEntry.getAddress(),
+ statusEntry.getOrPort(), statusEntry.getDirPort(),
+ statusEntry.getFlags(), statusEntry.getVersion(),
+ statusEntry.getBandwidth(), statusEntry.getPortList(),
+ statusEntry.getStatusEntryBytes());
try {
this.bsfh.addHashedRelay(DigestUtils.shaHex(Hex.decodeHex(
statusEntry.getFingerprint().toCharArray())).toUpperCase());
1
0

[metrics-web/master] Clean up main method for importing relay descriptors.
by karsten@torproject.org 22 Mar '12
by karsten@torproject.org 22 Mar '12
22 Mar '12
commit ed016c9e3552b72be8f4dc2bb46de259f226d04a
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Mar 21 15:18:23 2012 +0100
Clean up main method for importing relay descriptors.
---
src/org/torproject/ernie/cron/Main.java | 34 +++++++++++-------------------
1 files changed, 13 insertions(+), 21 deletions(-)
diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java
index 1c8ca48..8022fcc 100644
--- a/src/org/torproject/ernie/cron/Main.java
+++ b/src/org/torproject/ernie/cron/Main.java
@@ -37,28 +37,20 @@ public class Main {
new BridgeStatsFileHandler(
config.getRelayDescriptorDatabaseJDBC()) : null;
- // Prepare writing relay descriptors to database
- RelayDescriptorDatabaseImporter rddi =
- config.getWriteRelayDescriptorDatabase() ||
- config.getWriteRelayDescriptorsRawFiles() ?
- new RelayDescriptorDatabaseImporter(
- config.getWriteRelayDescriptorDatabase() ?
- config.getRelayDescriptorDatabaseJDBC() : null,
- config.getWriteRelayDescriptorsRawFiles() ?
- config.getRelayDescriptorRawFilesDirectory() : null) : null;
-
// Import relay descriptors
- if (rddi != null) {
- if (config.getImportDirectoryArchives()) {
- new ArchiveReader(rddi, bsfh,
- new File(config.getDirectoryArchivesDirectory()),
- statsDirectory,
- config.getKeepDirectoryArchiveImportHistory());
- }
- }
-
- // Close database connection (if active)
- if (rddi != null) {
+ if (config.getImportDirectoryArchives()) {
+ RelayDescriptorDatabaseImporter rddi =
+ config.getWriteRelayDescriptorDatabase() ||
+ config.getWriteRelayDescriptorsRawFiles() ?
+ new RelayDescriptorDatabaseImporter(
+ config.getWriteRelayDescriptorDatabase() ?
+ config.getRelayDescriptorDatabaseJDBC() : null,
+ config.getWriteRelayDescriptorsRawFiles() ?
+ config.getRelayDescriptorRawFilesDirectory() : null) : null;
+ new ArchiveReader(rddi, bsfh,
+ new File(config.getDirectoryArchivesDirectory()),
+ statsDirectory,
+ config.getKeepDirectoryArchiveImportHistory());
rddi.closeConnection();
}
1
0

[metrics-web/master] Use metrics-lib for parsing relay descriptors.
by karsten@torproject.org 22 Mar '12
by karsten@torproject.org 22 Mar '12
22 Mar '12
commit 72c1aee10e13d199a2e604adb47c670c417ab0a0
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Mar 21 14:44:30 2012 +0100
Use metrics-lib for parsing relay descriptors.
---
config.template | 2 +-
src/org/torproject/ernie/cron/ArchiveReader.java | 573 +++++---------------
src/org/torproject/ernie/cron/Configuration.java | 2 +-
.../cron/RelayDescriptorDatabaseImporter.java | 32 +-
4 files changed, 155 insertions(+), 454 deletions(-)
diff --git a/config.template b/config.template
index 8c56909..ce2dc94 100644
--- a/config.template
+++ b/config.template
@@ -2,7 +2,7 @@
#ImportDirectoryArchives 0
#
## Relative path to directory to import directory archives from
-#DirectoryArchivesDirectory archives/
+#DirectoryArchivesDirectory in/relay-descriptors/
#
## Keep a history of imported directory archive files to know which files
## have been imported before. This history can be useful when importing
diff --git a/src/org/torproject/ernie/cron/ArchiveReader.java b/src/org/torproject/ernie/cron/ArchiveReader.java
index ed1e505..5703661 100644
--- a/src/org/torproject/ernie/cron/ArchiveReader.java
+++ b/src/org/torproject/ernie/cron/ArchiveReader.java
@@ -2,34 +2,29 @@
* See LICENSE for licensing information */
package org.torproject.ernie.cron;
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
+import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.Stack;
import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.logging.Level;
import java.util.logging.Logger;
-import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.RelayNetworkStatusVote;
+import org.torproject.descriptor.ServerDescriptor;
/**
* Read in all files in a given directory and pass buffered readers of
@@ -72,439 +67,159 @@ public class ArchiveReader {
int parsedFiles = 0, ignoredFiles = 0;
this.logger = Logger.getLogger(ArchiveReader.class.getName());
- SortedMap<String, Long>
- lastArchivesImportHistory = new TreeMap<String, Long>(),
- newArchivesImportHistory = new TreeMap<String, Long>();
- File archivesImportHistoryFile = new File(statsDirectory,
- "archives-import-history");
- if (keepImportHistory && archivesImportHistoryFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- archivesImportHistoryFile));
- String line = null;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(",");
- if (parts.length < 2) {
- logger.warning("Archives import history file does not "
- + "contain timestamps. Skipping.");
- break;
- }
- long lastModified = Long.parseLong(parts[0]);
- String filename = parts[1];
- lastArchivesImportHistory.put(filename, lastModified);
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read in archives import "
- + "history file. Skipping.");
- }
- }
if (archivesDirectory.exists()) {
logger.fine("Importing files in directory " + archivesDirectory
+ "/...");
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(archivesDirectory);
- List<File> problems = new ArrayList<File>();
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- filesInInputDir.add(f);
- }
- } else {
- try {
- long lastModified = pop.lastModified();
- String filename = pop.getName();
- if (keepImportHistory) {
- newArchivesImportHistory.put(filename, lastModified);
- }
- if (keepImportHistory &&
- lastArchivesImportHistory.containsKey(filename) &&
- lastArchivesImportHistory.get(filename) >= lastModified) {
- ignoredFiles++;
- continue;
- } else if (filename.endsWith(".tar.bz2")) {
- logger.warning("Cannot parse compressed tarball "
- + pop.getAbsolutePath() + ". Skipping.");
- continue;
- }
- FileInputStream fis = new FileInputStream(pop);
- BufferedInputStream bis = new BufferedInputStream(fis);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- this.parse(allData);
- parsedFiles++;
- } catch (IOException e) {
- problems.add(pop);
- if (problems.size() > 3) {
- break;
- }
- }
- }
+ DescriptorReader reader =
+ DescriptorSourceFactory.createDescriptorReader();
+ reader.addDirectory(archivesDirectory);
+ if (keepImportHistory) {
+ reader.setExcludeFiles(new File(statsDirectory,
+ "relay-descriptor-history"));
}
- if (problems.isEmpty()) {
- logger.fine("Finished importing files in directory "
- + archivesDirectory + "/.");
- } else {
- StringBuilder sb = new StringBuilder("Failed importing files in "
- + "directory " + archivesDirectory + "/:");
- int printed = 0;
- for (File f : problems) {
- sb.append("\n " + f.getAbsolutePath());
- if (++printed >= 3) {
- sb.append("\n ... more");
- break;
+ Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ if (descriptorFile.getDescriptors() != null) {
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof RelayNetworkStatusConsensus) {
+ this.addRelayNetworkStatusConsensus(
+ (RelayNetworkStatusConsensus) descriptor);
+ } else if (descriptor instanceof RelayNetworkStatusVote) {
+ this.addRelayNetworkStatusVote(
+ (RelayNetworkStatusVote) descriptor);
+ } else if (descriptor instanceof ServerDescriptor) {
+ this.addServerDescriptor((ServerDescriptor) descriptor);
+ } else if (descriptor instanceof ExtraInfoDescriptor) {
+ this.addExtraInfoDescriptor(
+ (ExtraInfoDescriptor) descriptor);
+ }
}
}
- logger.warning(sb.toString());
- }
- }
- if (keepImportHistory) {
- try {
- archivesImportHistoryFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- archivesImportHistoryFile));
- for (Map.Entry<String, Long> historyEntry :
- newArchivesImportHistory.entrySet()) {
- bw.write(String.valueOf(historyEntry.getValue()) + ","
- + historyEntry.getKey() + "\n");
- }
- bw.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write archives import "
- + "history file.");
}
}
+
logger.info("Finished importing relay descriptors from local "
+ "directory:\nParsed " + parsedFiles + ", ignored "
+ ignoredFiles + " files.");
}
- public void parse(byte[] data) {
- try {
- /* Remove any @ lines at the beginning of the file and parse the
- * first non-@ line to find out the descriptor type. */
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line = br.readLine();
- while (line != null && line.startsWith("@")) {
- line = br.readLine();
- }
- if (line == null) {
- this.logger.fine("We were given a file that doesn't contain a "
- + "single descriptor for parsing. Ignoring.");
- return;
+ private void addRelayNetworkStatusConsensus(
+ RelayNetworkStatusConsensus consensus) {
+ for (NetworkStatusEntry statusEntry :
+ consensus.getStatusEntries().values()) {
+ this.rddi.addStatusEntry(consensus.getValidAfterMillis(),
+ statusEntry.getNickname(), statusEntry.getFingerprint(),
+ statusEntry.getDescriptor(), statusEntry.getPublishedMillis(),
+ statusEntry.getAddress(), statusEntry.getOrPort(),
+ statusEntry.getDirPort(), statusEntry.getFlags(),
+ statusEntry.getVersion(), statusEntry.getBandwidth(),
+ statusEntry.getPortList(), statusEntry.getStatusEntryBytes());
+ try {
+ this.bsfh.addHashedRelay(DigestUtils.shaHex(Hex.decodeHex(
+ statusEntry.getFingerprint().toCharArray())).toUpperCase());
+ } catch (DecoderException e) {
}
- br.close();
+ }
+ this.rddi.addConsensus(consensus.getValidAfterMillis(),
+ consensus.getRawDescriptorBytes());
+ }
- /* Split the byte[] possibly containing multiple descriptors into
- * byte[]'s with one descriptor each and parse them. */
- String startToken = null;
- if (line.equals("network-status-version 3")) {
- startToken = "network-status-version 3";
- } else if (line.startsWith("router ")) {
- startToken = "router ";
- } else if (line.startsWith("extra-info ")) {
- startToken = "extra-info ";
- } else if (line.equals("dir-key-certificate-version 3")) {
- this.logger.fine("Not parsing dir key certificate.");
- return;
- } else {
- this.logger.warning("Unknown descriptor type. First line is '"
- + line + "'. Ignoring.");
- return;
- }
- String splitToken = "\n" + startToken;
- String ascii = new String(data, "US-ASCII");
- int length = data.length, start = ascii.indexOf(startToken);
- while (start < length) {
- int end = ascii.indexOf(splitToken, start);
- if (end < 0) {
- end = length;
- } else {
- end += 1;
- }
- byte[] descBytes = new byte[end - start];
- System.arraycopy(data, start, descBytes, 0, end - start);
- parseSingleDescriptor(descBytes);
- start = end;
+ private void addRelayNetworkStatusVote(RelayNetworkStatusVote vote) {
+ this.rddi.addVote(vote.getValidAfterMillis(), vote.getIdentity(),
+ vote.getRawDescriptorBytes());
+ }
+
+ private void addServerDescriptor(ServerDescriptor descriptor) {
+ String digest = null;
+ try {
+ String ascii = new String(descriptor.getRawDescriptorBytes(),
+ "US-ASCII");
+ String startToken = "router ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 || sig >= 0 || sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(descriptor.getRawDescriptorBytes(), start,
+ forDigest, 0, sig - start);
+ digest = DigestUtils.shaHex(forDigest);
}
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
+ } catch (UnsupportedEncodingException e) {
+ }
+ if (digest != null) {
+ this.rddi.addServerDescriptor(digest, descriptor.getNickname(),
+ descriptor.getAddress(), descriptor.getOrPort(),
+ descriptor.getDirPort(), descriptor.getFingerprint(),
+ descriptor.getBandwidthRate(), descriptor.getBandwidthBurst(),
+ descriptor.getBandwidthObserved(), descriptor.getPlatform(),
+ descriptor.getPublishedMillis(), descriptor.getUptime(),
+ descriptor.getExtraInfoDigest(),
+ descriptor.getRawDescriptorBytes());
}
}
- private void parseSingleDescriptor(byte[] data) {
+ private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) {
+ if (descriptor.getDirreqV3Reqs() != null) {
+ int allUsers = 0;
+ Map<String, String> obs = new HashMap<String, String>();
+ for (Map.Entry<String, Integer> e :
+ descriptor.getDirreqV3Reqs().entrySet()) {
+ String country = e.getKey();
+ int users = e.getValue() - 4;
+ allUsers += users;
+ obs.put(country, "" + users);
+ }
+ obs.put("zy", "" + allUsers);
+ this.rddi.addDirReqStats(descriptor.getFingerprint(),
+ descriptor.getDirreqStatsEndMillis(),
+ descriptor.getDirreqStatsIntervalLength(), obs);
+ }
+ if (descriptor.getConnBiDirectStatsEndMillis() >= 0L) {
+ this.rddi.addConnBiDirect(descriptor.getFingerprint(),
+ descriptor.getConnBiDirectStatsEndMillis(),
+ descriptor.getConnBiDirectStatsIntervalLength(),
+ descriptor.getConnBiDirectBelow(),
+ descriptor.getConnBiDirectRead(),
+ descriptor.getConnBiDirectWrite(),
+ descriptor.getConnBiDirectBoth());
+ }
+ List<String> bandwidthHistoryLines = new ArrayList<String>();
+ if (descriptor.getWriteHistory() != null) {
+ bandwidthHistoryLines.add(descriptor.getWriteHistory().getLine());
+ }
+ if (descriptor.getReadHistory() != null) {
+ bandwidthHistoryLines.add(descriptor.getReadHistory().getLine());
+ }
+ if (descriptor.getDirreqWriteHistory() != null) {
+ bandwidthHistoryLines.add(
+ descriptor.getDirreqWriteHistory().getLine());
+ }
+ if (descriptor.getDirreqReadHistory() != null) {
+ bandwidthHistoryLines.add(
+ descriptor.getDirreqReadHistory().getLine());
+ }
+ String digest = null;
try {
- /* Convert descriptor to ASCII for parsing. This means we'll lose
- * the non-ASCII chars, but we don't care about them for parsing
- * anyway. */
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line = br.readLine();
- SimpleDateFormat parseFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (line.equals("network-status-version 3")) {
- // TODO when parsing the current consensus, check the fresh-until
- // time to see when we switch from hourly to half-hourly
- // consensuses
- boolean isConsensus = true;
- String validAfterTime = null, nickname = null,
- relayIdentity = null, serverDesc = null, version = null,
- ports = null;
- String dirSource = null, address = null;
- long validAfter = -1L, published = -1L, bandwidth = -1L,
- orPort = 0L, dirPort = 0L;
- SortedSet<String> relayFlags = null;
- StringBuilder rawStatusEntry = null;
- SortedSet<String> hashedRelayIdentities = new TreeSet<String>();
- while ((line = br.readLine()) != null) {
- if (line.equals("vote-status vote")) {
- isConsensus = false;
- } else if (line.startsWith("valid-after ")) {
- validAfterTime = line.substring("valid-after ".length());
- validAfter = parseFormat.parse(validAfterTime).getTime();
- } else if (line.startsWith("dir-source ")) {
- dirSource = line.split(" ")[2];
- } else if (line.startsWith("r ")) {
- if (isConsensus && relayIdentity != null &&
- this.rddi != null) {
- byte[] rawDescriptor = rawStatusEntry.toString().getBytes();
- this.rddi.addStatusEntry(validAfter, nickname,
- relayIdentity, serverDesc, published, address, orPort,
- dirPort, relayFlags, version, bandwidth, ports,
- rawDescriptor);
- relayFlags = null;
- version = null;
- bandwidth = -1L;
- ports = null;
- }
- rawStatusEntry = new StringBuilder(line + "\n");
- String[] parts = line.split(" ");
- if (parts.length < 9) {
- this.logger.log(Level.WARNING, "Could not parse r line '"
- + line + "' in descriptor. Skipping.");
- break;
- }
- nickname = parts[1];
- relayIdentity = Hex.encodeHexString(
- Base64.decodeBase64(parts[2] + "=")).
- toLowerCase();
- hashedRelayIdentities.add(DigestUtils.shaHex(
- Base64.decodeBase64(parts[2] + "=")).
- toUpperCase());
- serverDesc = Hex.encodeHexString(Base64.decodeBase64(
- parts[3] + "=")).toLowerCase();
- published = parseFormat.parse(parts[4] + " " + parts[5]).
- getTime();
- address = parts[6];
- orPort = Long.parseLong(parts[7]);
- dirPort = Long.parseLong(parts[8]);
- } else if (line.startsWith("s ") || line.equals("s")) {
- rawStatusEntry.append(line + "\n");
- relayFlags = new TreeSet<String>();
- if (line.length() > 2) {
- for (String flag : line.substring(2).split(" ")) {
- relayFlags.add(flag);
- }
- }
- } else if (line.startsWith("v ")) {
- rawStatusEntry.append(line + "\n");
- version = line.substring(2);
- } else if (line.startsWith("w ")) {
- rawStatusEntry.append(line + "\n");
- String[] parts = line.split(" ");
- for (String part : parts) {
- if (part.startsWith("Bandwidth=")) {
- bandwidth = Long.parseLong(part.substring(
- "Bandwidth=".length()));
- }
- }
- } else if (line.startsWith("p ")) {
- rawStatusEntry.append(line + "\n");
- ports = line.substring(2);
- }
- }
- if (isConsensus) {
- if (this.bsfh != null) {
- for (String hashedRelayIdentity : hashedRelayIdentities) {
- this.bsfh.addHashedRelay(hashedRelayIdentity);
- }
- }
- if (this.rddi != null) {
- this.rddi.addConsensus(validAfter, data);
- if (relayIdentity != null) {
- byte[] rawDescriptor = rawStatusEntry.toString().getBytes();
- this.rddi.addStatusEntry(validAfter, nickname,
- relayIdentity, serverDesc, published, address, orPort,
- dirPort, relayFlags, version, bandwidth, ports,
- rawDescriptor);
- }
- }
- } else {
- if (this.rddi != null) {
- this.rddi.addVote(validAfter, dirSource, data);
- }
- }
- } else if (line.startsWith("router ")) {
- String platformLine = null, bandwidthLine = null,
- extraInfoDigest = null, relayIdentifier = null;
- String[] parts = line.split(" ");
- String nickname = parts[1];
- String address = parts[2];
- int orPort = Integer.parseInt(parts[3]);
- int dirPort = Integer.parseInt(parts[4]);
- long published = -1L, uptime = -1L;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("platform ")) {
- platformLine = line;
- } else if (line.startsWith("published ")) {
- String publishedTime = line.substring("published ".length());
- published = parseFormat.parse(publishedTime).getTime();
- } else if (line.startsWith("opt fingerprint") ||
- line.startsWith("fingerprint")) {
- relayIdentifier = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- } else if (line.startsWith("bandwidth ")) {
- bandwidthLine = line;
- } else if (line.startsWith("opt extra-info-digest ") ||
- line.startsWith("extra-info-digest ")) {
- extraInfoDigest = line.startsWith("opt ") ?
- line.split(" ")[2].toLowerCase() :
- line.split(" ")[1].toLowerCase();
- } else if (line.startsWith("uptime ")) {
- uptime = Long.parseLong(line.substring("uptime ".length()));
- }
- }
- String ascii = new String(data, "US-ASCII");
- String startToken = "router ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- String digest = null;
- if (start >= 0 || sig >= 0 || sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- digest = DigestUtils.shaHex(forDigest);
- }
- if (this.rddi != null && digest != null) {
- String[] bwParts = bandwidthLine.split(" ");
- long bandwidthAvg = Long.parseLong(bwParts[1]);
- long bandwidthBurst = Long.parseLong(bwParts[2]);
- long bandwidthObserved = Long.parseLong(bwParts[3]);
- String platform = platformLine.substring("platform ".length());
- this.rddi.addServerDescriptor(digest, nickname, address, orPort,
- dirPort, relayIdentifier, bandwidthAvg, bandwidthBurst,
- bandwidthObserved, platform, published, uptime,
- extraInfoDigest, data);
- }
- } else if (line.startsWith("extra-info ")) {
- String nickname = line.split(" ")[1];
- long published = -1L;
- String dir = line.split(" ")[2];
- String statsEnd = null;
- long seconds = -1L;
- List<String> bandwidthHistory = new ArrayList<String>();
- while ((line = br.readLine()) != null) {
- if (line.startsWith("published ")) {
- String publishedTime = line.substring("published ".length());
- published = parseFormat.parse(publishedTime).getTime();
- } else if (line.startsWith("read-history ") ||
- line.startsWith("write-history ") ||
- line.startsWith("dirreq-read-history ") ||
- line.startsWith("dirreq-write-history ")) {
- bandwidthHistory.add(line);
- } else if (line.startsWith("dirreq-stats-end ")) {
- String[] parts = line.split(" ");
- if (parts.length < 5) {
- this.logger.warning("Could not parse dirreq-stats-end "
- + "line '" + line + "' in descriptor. Skipping.");
- break;
- }
- statsEnd = parts[1] + " " + parts[2];
- seconds = Long.parseLong(parts[3].substring(1));
- } else if (line.startsWith("dirreq-v3-reqs ")
- && line.length() > "dirreq-v3-reqs ".length()) {
- if (this.rddi != null) {
- try {
- int allUsers = 0;
- Map<String, String> obs = new HashMap<String, String>();
- String[] parts = line.substring("dirreq-v3-reqs ".
- length()).split(",");
- for (String p : parts) {
- String country = p.substring(0, 2);
- int users = Integer.parseInt(p.substring(3)) - 4;
- allUsers += users;
- obs.put(country, "" + users);
- }
- obs.put("zy", "" + allUsers);
- this.rddi.addDirReqStats(dir, statsEnd, seconds, obs);
- } catch (NumberFormatException e) {
- this.logger.log(Level.WARNING, "Could not parse "
- + "dirreq-v3-reqs line '" + line + "' in descriptor. "
- + "Skipping.", e);
- break;
- }
- }
- } else if (line.startsWith("conn-bi-direct ")) {
- if (this.rddi != null) {
- String[] parts = line.split(" ");
- if (parts.length == 6 &&
- parts[5].split(",").length == 4) {
- try {
- String connBiDirectStatsEnd = parts[1] + " " + parts[2];
- long connBiDirectSeconds = Long.parseLong(parts[3].
- substring(1));
- String[] parts2 = parts[5].split(",");
- long below = Long.parseLong(parts2[0]);
- long read = Long.parseLong(parts2[1]);
- long write = Long.parseLong(parts2[2]);
- long both = Long.parseLong(parts2[3]);
- this.rddi.addConnBiDirect(dir, connBiDirectStatsEnd,
- connBiDirectSeconds, below, read, write, both);
- } catch (NumberFormatException e) {
- this.logger.log(Level.WARNING, "Number format "
- + "exception while parsing conn-bi-direct stats "
- + "string '" + line + "'. Skipping.", e);
- }
- } else {
- this.logger.warning("Skipping invalid conn-bi-direct "
- + "stats string '" + line + "'.");
- }
- }
- }
- }
- String ascii = new String(data, "US-ASCII");
- String startToken = "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String digest = null;
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 || sig >= 0 || sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- digest = DigestUtils.shaHex(forDigest);
- }
- if (this.rddi != null && digest != null) {
- this.rddi.addExtraInfoDescriptor(digest, nickname,
- dir.toLowerCase(), published, data, bandwidthHistory);
- }
+ String ascii = new String(descriptor.getRawDescriptorBytes(),
+ "US-ASCII");
+ String startToken = "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 || sig >= 0 || sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(descriptor.getRawDescriptorBytes(), start,
+ forDigest, 0, sig - start);
+ digest = DigestUtils.shaHex(forDigest);
}
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
+ } catch (UnsupportedEncodingException e) {
+ }
+ if (digest != null) {
+ this.rddi.addExtraInfoDescriptor(digest, descriptor.getNickname(),
+ descriptor.getFingerprint().toLowerCase(),
+ descriptor.getPublishedMillis(),
+ descriptor.getRawDescriptorBytes(), bandwidthHistoryLines);
}
}
}
diff --git a/src/org/torproject/ernie/cron/Configuration.java b/src/org/torproject/ernie/cron/Configuration.java
index 0a493cb..1f30eb9 100644
--- a/src/org/torproject/ernie/cron/Configuration.java
+++ b/src/org/torproject/ernie/cron/Configuration.java
@@ -17,7 +17,7 @@ import java.util.logging.Logger;
*/
public class Configuration {
private boolean importDirectoryArchives = false;
- private String directoryArchivesDirectory = "archives/";
+ private String directoryArchivesDirectory = "in/relay-descriptors/";
private boolean keepDirectoryArchiveImportHistory = false;
private boolean importSanitizedBridges = false;
private String sanitizedBridgesDirectory = "bridges/";
diff --git a/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java b/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java
index 35ba90e..3ee7b6b 100644
--- a/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java
+++ b/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java
@@ -1017,21 +1017,14 @@ public final class RelayDescriptorDatabaseImporter {
/**
* Insert a conn-bi-direct stats string into the database.
*/
- public void addConnBiDirect(String source, String statsEnd,
+ public void addConnBiDirect(String source, long statsEndMillis,
long seconds, long below, long read, long write, long both) {
- long statsEndTime = 0L;
- try {
- statsEndTime = this.dateTimeFormat.parse(statsEnd).getTime();
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not add conn-bi-direct "
- + "stats string with interval ending '" + statsEnd + "'.", e);
- return;
- }
+ String statsEnd = this.dateTimeFormat.format(statsEndMillis);
if (this.importIntoDatabase) {
try {
- this.addDateToScheduledUpdates(statsEndTime);
+ this.addDateToScheduledUpdates(statsEndMillis);
Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
- Timestamp statsEndTimestamp = new Timestamp(statsEndTime);
+ Timestamp statsEndTimestamp = new Timestamp(statsEndMillis);
this.psBs.setString(1, source);
this.psBs.setTimestamp(2, statsEndTimestamp, cal);
ResultSet rs = psBs.executeQuery();
@@ -1085,21 +1078,14 @@ public final class RelayDescriptorDatabaseImporter {
* Adds observations on the number of directory requests by country as
* seen on a directory at a given date to the database.
*/
- public void addDirReqStats(String source, String statsEnd, long seconds,
- Map<String, String> dirReqsPerCountry) {
- long statsEndTime = 0L;
- try {
- statsEndTime = this.dateTimeFormat.parse(statsEnd).getTime();
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not add dirreq stats with "
- + "interval ending '" + statsEnd + "'.", e);
- return;
- }
+ public void addDirReqStats(String source, long statsEndMillis,
+ long seconds, Map<String, String> dirReqsPerCountry) {
+ String statsEnd = this.dateTimeFormat.format(statsEndMillis);
if (this.importIntoDatabase) {
try {
- this.addDateToScheduledUpdates(statsEndTime);
+ this.addDateToScheduledUpdates(statsEndMillis);
Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
- Timestamp statsEndTimestamp = new Timestamp(statsEndTime);
+ Timestamp statsEndTimestamp = new Timestamp(statsEndMillis);
this.psQs.setString(1, source);
this.psQs.setTimestamp(2, statsEndTimestamp, cal);
ResultSet rs = psQs.executeQuery();
1
0
commit bfa21af59bdae4ee9c924792e6606a7829d0941c
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Mar 21 16:23:37 2012 +0100
Clean up ArchiveReader a bit.
---
src/org/torproject/ernie/cron/ArchiveReader.java | 10 +---------
1 files changed, 1 insertions(+), 9 deletions(-)
diff --git a/src/org/torproject/ernie/cron/ArchiveReader.java b/src/org/torproject/ernie/cron/ArchiveReader.java
index 4befe51..e4f6b2a 100644
--- a/src/org/torproject/ernie/cron/ArchiveReader.java
+++ b/src/org/torproject/ernie/cron/ArchiveReader.java
@@ -48,8 +48,6 @@ public class ArchiveReader {
*/
private Logger logger;
- private SimpleDateFormat dateTimeFormat;
-
public ArchiveReader(RelayDescriptorDatabaseImporter rddi,
BridgeStatsFileHandler bsfh, File archivesDirectory,
File statsDirectory, boolean keepImportHistory) {
@@ -62,10 +60,6 @@ public class ArchiveReader {
this.rddi = rddi;
this.bsfh = bsfh;
- this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-
- int parsedFiles = 0, ignoredFiles = 0;
this.logger = Logger.getLogger(ArchiveReader.class.getName());
if (archivesDirectory.exists()) {
logger.fine("Importing files in directory " + archivesDirectory
@@ -99,9 +93,7 @@ public class ArchiveReader {
}
}
- logger.info("Finished importing relay descriptors from local "
- + "directory:\nParsed " + parsedFiles + ", ignored "
- + ignoredFiles + " files.");
+ logger.info("Finished importing relay descriptors.");
}
private void addRelayNetworkStatusConsensus(
1
0

[metrics-web/master] Use metrics-lib for parsing bridge descriptors, too.
by karsten@torproject.org 22 Mar '12
by karsten@torproject.org 22 Mar '12
22 Mar '12
commit 333a9c497b8558d584af7e27d953f50ad800aa11
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Mar 21 16:24:23 2012 +0100
Use metrics-lib for parsing bridge descriptors, too.
---
.../ernie/cron/BridgeStatsFileHandler.java | 27 ++-
.../ernie/cron/ConsensusStatsFileHandler.java | 8 +-
.../ernie/cron/SanitizedBridgesReader.java | 299 ++++++--------------
3 files changed, 111 insertions(+), 223 deletions(-)
diff --git a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
index 6e7f4eb..aba7804 100644
--- a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
+++ b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
@@ -14,12 +14,15 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
+import java.util.TimeZone;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.logging.Level;
@@ -95,6 +98,8 @@ public class BridgeStatsFileHandler {
/* Database connection string. */
private String connectionURL = null;
+ private SimpleDateFormat dateTimeFormat;
+
/**
* Initializes this class, including reading in intermediate results
* files <code>stats/bridge-stats-raw</code> and
@@ -122,6 +127,9 @@ public class BridgeStatsFileHandler {
/* Initialize database connection string. */
this.connectionURL = connectionURL;
+ this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
/* Initialize logger. */
this.logger = Logger.getLogger(
BridgeStatsFileHandler.class.getName());
@@ -175,7 +183,9 @@ public class BridgeStatsFileHandler {
obs.put(headers[i], parts[i]);
}
}
- this.addObs(hashedBridgeIdentity, date, time, obs);
+ long dateTimeMillis = dateTimeFormat.parse(date + " "
+ + time).getTime();
+ this.addObs(hashedBridgeIdentity, dateTimeMillis, obs);
}
}
}
@@ -185,6 +195,9 @@ public class BridgeStatsFileHandler {
} catch (IOException e) {
this.logger.log(Level.WARNING, "Failed to read file "
+ this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
}
}
@@ -253,9 +266,10 @@ public class BridgeStatsFileHandler {
* not included in the results, because stats are very likely broken.
*/
public void addZeroTwoTwoDescriptor(String hashedBridgeIdentity,
- String date, String time) {
- String value = hashedBridgeIdentity.toUpperCase() + "," + date + ","
- + time;
+ long publishedMillis) {
+ String value = hashedBridgeIdentity.toUpperCase() + ","
+ + this.dateTimeFormat.format(publishedMillis).
+ replaceAll(" ", ",");
if (!this.zeroTwoTwoDescriptors.contains(value)) {
this.logger.finer("Adding new bridge 0.2.2.x extra-info "
+ "descriptor: " + value);
@@ -281,11 +295,14 @@ public class BridgeStatsFileHandler {
* bridge and day, we keep the one with the later publication time and
* discard the other one.
*/
- public void addObs(String hashedIdentity, String date, String time,
+ public void addObs(String hashedIdentity, long publishedMillis,
Map<String, String> obs) {
for (String country : obs.keySet()) {
this.countries.add(country);
}
+ String dateTime = this.dateTimeFormat.format(publishedMillis);
+ String date = dateTime.split(" ")[0];
+ String time = dateTime.split(" ")[1];
String shortKey = hashedIdentity + "," + date;
String longKey = shortKey + "," + time;
SortedMap<String, Map<String, String>> tailMap =
diff --git a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
index 6c83b05..fccb29b 100644
--- a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
+++ b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
@@ -66,6 +66,8 @@ public class ConsensusStatsFileHandler {
/* Database connection string. */
private String connectionURL = null;
+ private SimpleDateFormat dateTimeFormat;
+
/**
* Initializes this class, including reading in intermediate results
* files <code>stats/consensus-stats-raw</code> and
@@ -86,6 +88,9 @@ public class ConsensusStatsFileHandler {
/* Initialize database connection string. */
this.connectionURL = connectionURL;
+ this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
/* Initialize logger. */
this.logger = Logger.getLogger(
ConsensusStatsFileHandler.class.getName());
@@ -128,7 +133,8 @@ public class ConsensusStatsFileHandler {
* Adds the intermediate results of the number of running bridges in a
* given bridge status to the existing observations.
*/
- public void addBridgeConsensusResults(String published, int running) {
+ public void addBridgeConsensusResults(long publishedMillis, int running) {
+ String published = dateTimeFormat.format(publishedMillis);
String line = published + "," + running;
if (!this.bridgesRaw.containsKey(published)) {
this.logger.finer("Adding new bridge numbers: " + line);
diff --git a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
index 78bd7db..c7199cd 100644
--- a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
+++ b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
@@ -2,30 +2,20 @@
* See LICENSE for licensing information */
package org.torproject.ernie.cron;
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
import java.util.HashMap;
-import java.util.List;
+import java.util.Iterator;
import java.util.Map;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
import java.util.logging.Logger;
-import org.apache.commons.codec.digest.DigestUtils;
+import org.torproject.descriptor.BridgeNetworkStatus;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.ServerDescriptor;
public class SanitizedBridgesReader {
private ConsensusStatsFileHandler csfh;
@@ -45,217 +35,92 @@ public class SanitizedBridgesReader {
this.logger =
Logger.getLogger(SanitizedBridgesReader.class.getName());
- SortedSet<String> bridgesImportHistory = new TreeSet<String>();
- File bridgesImportHistoryFile =
- new File(statsDirectory, "bridges-import-history");
- if (keepImportHistory && bridgesImportHistoryFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- bridgesImportHistoryFile));
- String line = null;
- while ((line = br.readLine()) != null) {
- bridgesImportHistory.add(line);
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read in bridge descriptor "
- + "import history file. Skipping.");
- }
- }
if (bridgesDir.exists()) {
logger.fine("Importing files in directory " + bridgesDir + "/...");
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(bridgesDir);
- List<File> problems = new ArrayList<File>();
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- filesInInputDir.add(f);
- }
- continue;
- } else if (keepImportHistory && bridgesImportHistory.contains(
- pop.getName())) {
- continue;
- } else {
- try {
- BufferedInputStream bis = new BufferedInputStream(
- new FileInputStream(pop));
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- String fn = pop.getName();
- // TODO dateTime extraction doesn't work for sanitized network
- // statuses!
- String dateTime = fn.substring(0, 4) + "-" + fn.substring(4, 6)
- + "-" + fn.substring(6, 8) + " " + fn.substring(9, 11)
- + ":" + fn.substring(11, 13) + ":" + fn.substring(13, 15);
- this.parse(allData, dateTime, true);
- if (keepImportHistory) {
- bridgesImportHistory.add(pop.getName());
- }
- } catch (IOException e) {
- problems.add(pop);
- if (problems.size() > 3) {
- break;
- }
- }
- }
+ DescriptorReader reader =
+ DescriptorSourceFactory.createDescriptorReader();
+ reader.addDirectory(bridgesDir);
+ if (keepImportHistory) {
+ reader.setExcludeFiles(new File(statsDirectory,
+ "bridge-descriptor-history"));
}
- if (problems.isEmpty()) {
- logger.fine("Finished importing files in directory " + bridgesDir
- + "/.");
- } else {
- StringBuilder sb = new StringBuilder("Failed importing files in "
- + "directory " + bridgesDir + "/:");
- int printed = 0;
- for (File f : problems) {
- sb.append("\n " + f.getAbsolutePath());
- if (++printed >= 3) {
- sb.append("\n ... more");
- break;
+ Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ if (descriptorFile.getDescriptors() != null) {
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof BridgeNetworkStatus) {
+ this.addBridgeNetworkStatus(
+ (BridgeNetworkStatus) descriptor);
+ } else if (descriptor instanceof ServerDescriptor) {
+ this.addServerDescriptor((ServerDescriptor) descriptor);
+ } else if (descriptor instanceof ExtraInfoDescriptor) {
+ this.addExtraInfoDescriptor(
+ (ExtraInfoDescriptor) descriptor);
+ }
}
}
- logger.warning(sb.toString());
}
- if (keepImportHistory) {
- try {
- bridgesImportHistoryFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- bridgesImportHistoryFile));
- for (String line : bridgesImportHistory) {
- bw.write(line + "\n");
- }
- bw.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write bridge descriptor "
- + "import history file.");
- }
+ logger.info("Finished importing bridge descriptors.");
+ }
+ }
+
+ private void addBridgeNetworkStatus(BridgeNetworkStatus status) {
+ int runningBridges = 0;
+ for (NetworkStatusEntry statusEntry :
+ status.getStatusEntries().values()) {
+ if (statusEntry.getFlags().contains("Running")) {
+ runningBridges++;
}
}
+ this.csfh.addBridgeConsensusResults(status.getPublishedMillis(),
+ runningBridges);
}
- private void parse(byte[] allData, String dateTime, boolean sanitized) {
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, "US-ASCII")));
- SimpleDateFormat timeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String hashedIdentity = null, platformLine = null,
- publishedLine = null, geoipStartTimeLine = null,
- bridgeStatsEndLine = null;
- boolean skip = false;
- String line = null;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("r ")) {
- int runningBridges = 0;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("s ") && line.contains(" Running")) {
- runningBridges++;
- }
- }
- if (this.csfh != null) {
- this.csfh.addBridgeConsensusResults(dateTime, runningBridges);
- }
- } else if (line.startsWith("router ")) {
- } else if (line.startsWith("extra-info ")) {
- hashedIdentity = sanitized ? line.split(" ")[2]
- : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase();
- if (this.bsfh != null) {
- skip = this.bsfh.isKnownRelay(hashedIdentity);
- }
- } else if (!skip && line.startsWith("platform ")) {
- platformLine = line;
- } else if (!skip && line.startsWith("published ")) {
- publishedLine = line;
- } else if (line.startsWith("opt fingerprint") ||
- line.startsWith("fingerprint")) {
- String identity = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- hashedIdentity = sanitized ? identity
- : DigestUtils.shaHex(identity).toUpperCase();
- } else if (!skip && line.startsWith("geoip-start-time ")) {
- geoipStartTimeLine = line;
- } else if (!skip && line.startsWith("geoip-client-origins")
- && line.split(" ").length > 1) {
- if (publishedLine == null ||
- geoipStartTimeLine == null) {
- this.logger.warning("Either published line or "
- + "geoip-start-time line is not present in "
- + (sanitized ? "sanitized" : "non-sanitized")
- + " bridge descriptors from " + dateTime + ".");
- break;
- }
- long published = timeFormat.parse(publishedLine.
- substring("published ".length())).getTime();
- long started = timeFormat.parse(geoipStartTimeLine.
- substring("geoip-start-time ".length())).getTime();
- long seconds = (published - started) / 1000L;
- double allUsers = 0.0D;
- Map<String, String> obs = new HashMap<String, String>();
- String[] parts = line.split(" ")[1].split(",");
- for (String p : parts) {
- String country = p.substring(0, 2);
- double users = ((double) Long.parseLong(p.substring(3)) - 4L)
- * 86400.0D / ((double) seconds);
- allUsers += users;
- obs.put(country, String.format("%.2f", users));
- }
- obs.put("zy", String.format("%.2f", allUsers));
- String date = publishedLine.split(" ")[1];
- String time = publishedLine.split(" ")[2];
- if (this.bsfh != null) {
- this.bsfh.addObs(hashedIdentity, date, time, obs);
- }
- } else if (!skip && line.startsWith("bridge-stats-end ")) {
- bridgeStatsEndLine = line;
- } else if (!skip && line.startsWith("bridge-ips")
- && line.split(" ").length > 1) {
- if (bridgeStatsEndLine == null) {
- this.logger.warning("bridge-ips line without preceding "
- + "bridge-stats-end line in "
- + (sanitized ? "sanitized" : "non-sanitized")
- + " bridge descriptor.");
- break;
- }
- double allUsers = 0.0D;
- Map<String, String> obs = new HashMap<String, String>();
- String[] parts = line.split(" ")[1].split(",");
- for (String p : parts) {
- String country = p.substring(0, 2);
- double users = (double) Long.parseLong(p.substring(3)) - 4L;
- allUsers += users;
- obs.put(country, String.format("%.2f", users));
- }
- obs.put("zy", String.format("%.2f", allUsers));
- String date = bridgeStatsEndLine.split(" ")[1];
- String time = bridgeStatsEndLine.split(" ")[2];
- if (this.bsfh != null) {
- this.bsfh.addObs(hashedIdentity, date, time, obs);
- }
+ private void addServerDescriptor(ServerDescriptor descriptor) {
+ if (descriptor.getPlatform() != null &&
+ descriptor.getPlatform().startsWith("Tor 0.2.2")) {
+ this.bsfh.addZeroTwoTwoDescriptor(descriptor.getFingerprint(),
+ descriptor.getPublishedMillis());
+ }
+ }
+
+ private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) {
+ if (!this.bsfh.isKnownRelay(descriptor.getFingerprint())) {
+ if (descriptor.getGeoipStartTimeMillis() >= 0 &&
+ descriptor.getGeoipClientOrigins() != null) {
+ long seconds = (descriptor.getPublishedMillis()
+ - descriptor.getGeoipStartTimeMillis()) / 1000L;
+ double allUsers = 0.0D;
+ Map<String, String> obs = new HashMap<String, String>();
+ for (Map.Entry<String, Integer> e :
+ descriptor.getGeoipClientOrigins().entrySet()) {
+ String country = e.getKey();
+ double users = ((double) e.getValue() - 4) * 86400.0D
+ / ((double) seconds);
+ allUsers += users;
+ obs.put(country, String.format("%.2f", users));
}
+ obs.put("zy", String.format("%.2f", allUsers));
+ this.bsfh.addObs(descriptor.getFingerprint(),
+ descriptor.getPublishedMillis(), obs);
}
- if (this.bsfh != null && platformLine != null &&
- platformLine.startsWith("platform Tor 0.2.2")) {
- String date = publishedLine.split(" ")[1];
- String time = publishedLine.split(" ")[2];
- this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time);
+ if (descriptor.getBridgeStatsEndMillis() >= 0 &&
+ descriptor.getBridgeIps() != null) {
+ double allUsers = 0.0D;
+ Map<String, String> obs = new HashMap<String, String>();
+ for (Map.Entry<String, Integer> e :
+ descriptor.getBridgeIps().entrySet()) {
+ String country = e.getKey();
+ double users = (double) e.getValue() - 4;
+ allUsers += users;
+ obs.put(country, String.format("%.2f", users));
+ }
+ obs.put("zy", String.format("%.2f", allUsers));
+ this.bsfh.addObs(descriptor.getFingerprint(),
+ descriptor.getBridgeStatsEndMillis(), obs);
}
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
- e);
- return;
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
- e);
- return;
+
}
}
}
1
0

[metrics-web/master] Change default location of sanitized bridges.
by karsten@torproject.org 22 Mar '12
by karsten@torproject.org 22 Mar '12
22 Mar '12
commit ec61104d8e950bb759a9318f76a6a6afd5ef5fe3
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Mar 22 11:40:22 2012 +0100
Change default location of sanitized bridges.
---
config.template | 2 +-
src/org/torproject/ernie/cron/Configuration.java | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/config.template b/config.template
index ce2dc94..9760b56 100644
--- a/config.template
+++ b/config.template
@@ -14,7 +14,7 @@
#ImportSanitizedBridges 0
#
## Relative path to directory to import sanitized bridges from
-#SanitizedBridgesDirectory bridges/
+#SanitizedBridgesDirectory in/bridge-descriptors/
#
## Keep a history of imported sanitized bridge descriptors. This history
## can be useful when importing from a changing data source to avoid
diff --git a/src/org/torproject/ernie/cron/Configuration.java b/src/org/torproject/ernie/cron/Configuration.java
index 1f30eb9..82a004b 100644
--- a/src/org/torproject/ernie/cron/Configuration.java
+++ b/src/org/torproject/ernie/cron/Configuration.java
@@ -20,7 +20,7 @@ public class Configuration {
private String directoryArchivesDirectory = "in/relay-descriptors/";
private boolean keepDirectoryArchiveImportHistory = false;
private boolean importSanitizedBridges = false;
- private String sanitizedBridgesDirectory = "bridges/";
+ private String sanitizedBridgesDirectory = "in/bridge-descriptors/";
private boolean keepSanitizedBridgesImportHistory = false;
private boolean writeRelayDescriptorDatabase = false;
private String relayDescriptorDatabaseJdbc =
1
0

22 Mar '12
commit a2b00950c8b4991e20e3026c04f8b5b49ce530d3
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Mar 21 12:20:01 2012 +0100
Remove relay descriptor parser.
The purpose of this commit is to prepare adding metrics-lib as library for
descriptor parsing.
---
src/org/torproject/ernie/cron/ArchiveReader.java | 367 ++++++++++++++++++-
src/org/torproject/ernie/cron/Main.java | 9 +-
.../ernie/cron/RelayDescriptorParser.java | 387 --------------------
3 files changed, 365 insertions(+), 398 deletions(-)
diff --git a/src/org/torproject/ernie/cron/ArchiveReader.java b/src/org/torproject/ernie/cron/ArchiveReader.java
index 20a0905..ed1e505 100644
--- a/src/org/torproject/ernie/cron/ArchiveReader.java
+++ b/src/org/torproject/ernie/cron/ArchiveReader.java
@@ -11,30 +11,67 @@ import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
+import java.util.SortedSet;
import java.util.Stack;
+import java.util.TimeZone;
import java.util.TreeMap;
+import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+
/**
* Read in all files in a given directory and pass buffered readers of
* them to the relay descriptor parser.
*/
public class ArchiveReader {
- public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory,
+
+ /**
+ * Stats file handler that accepts parse results for bridge statistics.
+ */
+ private BridgeStatsFileHandler bsfh;
+
+ /**
+ * Relay descriptor database importer that stores relay descriptor
+ * contents for later evaluation.
+ */
+ private RelayDescriptorDatabaseImporter rddi;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ private SimpleDateFormat dateTimeFormat;
+
+ public ArchiveReader(RelayDescriptorDatabaseImporter rddi,
+ BridgeStatsFileHandler bsfh, File archivesDirectory,
File statsDirectory, boolean keepImportHistory) {
- if (rdp == null || archivesDirectory == null ||
+ if (archivesDirectory == null ||
statsDirectory == null) {
throw new IllegalArgumentException();
}
+ this.rddi = rddi;
+ this.bsfh = bsfh;
+
+ this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
int parsedFiles = 0, ignoredFiles = 0;
- Logger logger = Logger.getLogger(ArchiveReader.class.getName());
+ this.logger = Logger.getLogger(ArchiveReader.class.getName());
SortedMap<String, Long>
lastArchivesImportHistory = new TreeMap<String, Long>(),
newArchivesImportHistory = new TreeMap<String, Long>();
@@ -101,7 +138,7 @@ public class ArchiveReader {
}
bis.close();
byte[] allData = baos.toByteArray();
- rdp.parse(allData);
+ this.parse(allData);
parsedFiles++;
} catch (IOException e) {
problems.add(pop);
@@ -148,5 +185,327 @@ public class ArchiveReader {
+ "directory:\nParsed " + parsedFiles + ", ignored "
+ ignoredFiles + " files.");
}
+
+ public void parse(byte[] data) {
+ try {
+ /* Remove any @ lines at the beginning of the file and parse the
+ * first non-@ line to find out the descriptor type. */
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line = br.readLine();
+ while (line != null && line.startsWith("@")) {
+ line = br.readLine();
+ }
+ if (line == null) {
+ this.logger.fine("We were given a file that doesn't contain a "
+ + "single descriptor for parsing. Ignoring.");
+ return;
+ }
+ br.close();
+
+ /* Split the byte[] possibly containing multiple descriptors into
+ * byte[]'s with one descriptor each and parse them. */
+ String startToken = null;
+ if (line.equals("network-status-version 3")) {
+ startToken = "network-status-version 3";
+ } else if (line.startsWith("router ")) {
+ startToken = "router ";
+ } else if (line.startsWith("extra-info ")) {
+ startToken = "extra-info ";
+ } else if (line.equals("dir-key-certificate-version 3")) {
+ this.logger.fine("Not parsing dir key certificate.");
+ return;
+ } else {
+ this.logger.warning("Unknown descriptor type. First line is '"
+ + line + "'. Ignoring.");
+ return;
+ }
+ String splitToken = "\n" + startToken;
+ String ascii = new String(data, "US-ASCII");
+ int length = data.length, start = ascii.indexOf(startToken);
+ while (start < length) {
+ int end = ascii.indexOf(splitToken, start);
+ if (end < 0) {
+ end = length;
+ } else {
+ end += 1;
+ }
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(data, start, descBytes, 0, end - start);
+ parseSingleDescriptor(descBytes);
+ start = end;
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ + "Skipping.", e);
+ }
+ }
+
+ private void parseSingleDescriptor(byte[] data) {
+ try {
+ /* Convert descriptor to ASCII for parsing. This means we'll lose
+ * the non-ASCII chars, but we don't care about them for parsing
+ * anyway. */
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line = br.readLine();
+ SimpleDateFormat parseFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (line.equals("network-status-version 3")) {
+ // TODO when parsing the current consensus, check the fresh-until
+ // time to see when we switch from hourly to half-hourly
+ // consensuses
+ boolean isConsensus = true;
+ String validAfterTime = null, nickname = null,
+ relayIdentity = null, serverDesc = null, version = null,
+ ports = null;
+ String dirSource = null, address = null;
+ long validAfter = -1L, published = -1L, bandwidth = -1L,
+ orPort = 0L, dirPort = 0L;
+ SortedSet<String> relayFlags = null;
+ StringBuilder rawStatusEntry = null;
+ SortedSet<String> hashedRelayIdentities = new TreeSet<String>();
+ while ((line = br.readLine()) != null) {
+ if (line.equals("vote-status vote")) {
+ isConsensus = false;
+ } else if (line.startsWith("valid-after ")) {
+ validAfterTime = line.substring("valid-after ".length());
+ validAfter = parseFormat.parse(validAfterTime).getTime();
+ } else if (line.startsWith("dir-source ")) {
+ dirSource = line.split(" ")[2];
+ } else if (line.startsWith("r ")) {
+ if (isConsensus && relayIdentity != null &&
+ this.rddi != null) {
+ byte[] rawDescriptor = rawStatusEntry.toString().getBytes();
+ this.rddi.addStatusEntry(validAfter, nickname,
+ relayIdentity, serverDesc, published, address, orPort,
+ dirPort, relayFlags, version, bandwidth, ports,
+ rawDescriptor);
+ relayFlags = null;
+ version = null;
+ bandwidth = -1L;
+ ports = null;
+ }
+ rawStatusEntry = new StringBuilder(line + "\n");
+ String[] parts = line.split(" ");
+ if (parts.length < 9) {
+ this.logger.log(Level.WARNING, "Could not parse r line '"
+ + line + "' in descriptor. Skipping.");
+ break;
+ }
+ nickname = parts[1];
+ relayIdentity = Hex.encodeHexString(
+ Base64.decodeBase64(parts[2] + "=")).
+ toLowerCase();
+ hashedRelayIdentities.add(DigestUtils.shaHex(
+ Base64.decodeBase64(parts[2] + "=")).
+ toUpperCase());
+ serverDesc = Hex.encodeHexString(Base64.decodeBase64(
+ parts[3] + "=")).toLowerCase();
+ published = parseFormat.parse(parts[4] + " " + parts[5]).
+ getTime();
+ address = parts[6];
+ orPort = Long.parseLong(parts[7]);
+ dirPort = Long.parseLong(parts[8]);
+ } else if (line.startsWith("s ") || line.equals("s")) {
+ rawStatusEntry.append(line + "\n");
+ relayFlags = new TreeSet<String>();
+ if (line.length() > 2) {
+ for (String flag : line.substring(2).split(" ")) {
+ relayFlags.add(flag);
+ }
+ }
+ } else if (line.startsWith("v ")) {
+ rawStatusEntry.append(line + "\n");
+ version = line.substring(2);
+ } else if (line.startsWith("w ")) {
+ rawStatusEntry.append(line + "\n");
+ String[] parts = line.split(" ");
+ for (String part : parts) {
+ if (part.startsWith("Bandwidth=")) {
+ bandwidth = Long.parseLong(part.substring(
+ "Bandwidth=".length()));
+ }
+ }
+ } else if (line.startsWith("p ")) {
+ rawStatusEntry.append(line + "\n");
+ ports = line.substring(2);
+ }
+ }
+ if (isConsensus) {
+ if (this.bsfh != null) {
+ for (String hashedRelayIdentity : hashedRelayIdentities) {
+ this.bsfh.addHashedRelay(hashedRelayIdentity);
+ }
+ }
+ if (this.rddi != null) {
+ this.rddi.addConsensus(validAfter, data);
+ if (relayIdentity != null) {
+ byte[] rawDescriptor = rawStatusEntry.toString().getBytes();
+ this.rddi.addStatusEntry(validAfter, nickname,
+ relayIdentity, serverDesc, published, address, orPort,
+ dirPort, relayFlags, version, bandwidth, ports,
+ rawDescriptor);
+ }
+ }
+ } else {
+ if (this.rddi != null) {
+ this.rddi.addVote(validAfter, dirSource, data);
+ }
+ }
+ } else if (line.startsWith("router ")) {
+ String platformLine = null, bandwidthLine = null,
+ extraInfoDigest = null, relayIdentifier = null;
+ String[] parts = line.split(" ");
+ String nickname = parts[1];
+ String address = parts[2];
+ int orPort = Integer.parseInt(parts[3]);
+ int dirPort = Integer.parseInt(parts[4]);
+ long published = -1L, uptime = -1L;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("platform ")) {
+ platformLine = line;
+ } else if (line.startsWith("published ")) {
+ String publishedTime = line.substring("published ".length());
+ published = parseFormat.parse(publishedTime).getTime();
+ } else if (line.startsWith("opt fingerprint") ||
+ line.startsWith("fingerprint")) {
+ relayIdentifier = line.substring(line.startsWith("opt ") ?
+ "opt fingerprint".length() : "fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ } else if (line.startsWith("bandwidth ")) {
+ bandwidthLine = line;
+ } else if (line.startsWith("opt extra-info-digest ") ||
+ line.startsWith("extra-info-digest ")) {
+ extraInfoDigest = line.startsWith("opt ") ?
+ line.split(" ")[2].toLowerCase() :
+ line.split(" ")[1].toLowerCase();
+ } else if (line.startsWith("uptime ")) {
+ uptime = Long.parseLong(line.substring("uptime ".length()));
+ }
+ }
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "router ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ String digest = null;
+ if (start >= 0 || sig >= 0 || sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ digest = DigestUtils.shaHex(forDigest);
+ }
+ if (this.rddi != null && digest != null) {
+ String[] bwParts = bandwidthLine.split(" ");
+ long bandwidthAvg = Long.parseLong(bwParts[1]);
+ long bandwidthBurst = Long.parseLong(bwParts[2]);
+ long bandwidthObserved = Long.parseLong(bwParts[3]);
+ String platform = platformLine.substring("platform ".length());
+ this.rddi.addServerDescriptor(digest, nickname, address, orPort,
+ dirPort, relayIdentifier, bandwidthAvg, bandwidthBurst,
+ bandwidthObserved, platform, published, uptime,
+ extraInfoDigest, data);
+ }
+ } else if (line.startsWith("extra-info ")) {
+ String nickname = line.split(" ")[1];
+ long published = -1L;
+ String dir = line.split(" ")[2];
+ String statsEnd = null;
+ long seconds = -1L;
+ List<String> bandwidthHistory = new ArrayList<String>();
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("published ")) {
+ String publishedTime = line.substring("published ".length());
+ published = parseFormat.parse(publishedTime).getTime();
+ } else if (line.startsWith("read-history ") ||
+ line.startsWith("write-history ") ||
+ line.startsWith("dirreq-read-history ") ||
+ line.startsWith("dirreq-write-history ")) {
+ bandwidthHistory.add(line);
+ } else if (line.startsWith("dirreq-stats-end ")) {
+ String[] parts = line.split(" ");
+ if (parts.length < 5) {
+ this.logger.warning("Could not parse dirreq-stats-end "
+ + "line '" + line + "' in descriptor. Skipping.");
+ break;
+ }
+ statsEnd = parts[1] + " " + parts[2];
+ seconds = Long.parseLong(parts[3].substring(1));
+ } else if (line.startsWith("dirreq-v3-reqs ")
+ && line.length() > "dirreq-v3-reqs ".length()) {
+ if (this.rddi != null) {
+ try {
+ int allUsers = 0;
+ Map<String, String> obs = new HashMap<String, String>();
+ String[] parts = line.substring("dirreq-v3-reqs ".
+ length()).split(",");
+ for (String p : parts) {
+ String country = p.substring(0, 2);
+ int users = Integer.parseInt(p.substring(3)) - 4;
+ allUsers += users;
+ obs.put(country, "" + users);
+ }
+ obs.put("zy", "" + allUsers);
+ this.rddi.addDirReqStats(dir, statsEnd, seconds, obs);
+ } catch (NumberFormatException e) {
+ this.logger.log(Level.WARNING, "Could not parse "
+ + "dirreq-v3-reqs line '" + line + "' in descriptor. "
+ + "Skipping.", e);
+ break;
+ }
+ }
+ } else if (line.startsWith("conn-bi-direct ")) {
+ if (this.rddi != null) {
+ String[] parts = line.split(" ");
+ if (parts.length == 6 &&
+ parts[5].split(",").length == 4) {
+ try {
+ String connBiDirectStatsEnd = parts[1] + " " + parts[2];
+ long connBiDirectSeconds = Long.parseLong(parts[3].
+ substring(1));
+ String[] parts2 = parts[5].split(",");
+ long below = Long.parseLong(parts2[0]);
+ long read = Long.parseLong(parts2[1]);
+ long write = Long.parseLong(parts2[2]);
+ long both = Long.parseLong(parts2[3]);
+ this.rddi.addConnBiDirect(dir, connBiDirectStatsEnd,
+ connBiDirectSeconds, below, read, write, both);
+ } catch (NumberFormatException e) {
+ this.logger.log(Level.WARNING, "Number format "
+ + "exception while parsing conn-bi-direct stats "
+ + "string '" + line + "'. Skipping.", e);
+ }
+ } else {
+ this.logger.warning("Skipping invalid conn-bi-direct "
+ + "stats string '" + line + "'.");
+ }
+ }
+ }
+ }
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String digest = null;
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 || sig >= 0 || sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ digest = DigestUtils.shaHex(forDigest);
+ }
+ if (this.rddi != null && digest != null) {
+ this.rddi.addExtraInfoDescriptor(digest, nickname,
+ dir.toLowerCase(), published, data, bandwidthHistory);
+ }
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ + "Skipping.", e);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ + "Skipping.", e);
+ }
+ }
}
diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java
index 57ad804..d631a7b 100644
--- a/src/org/torproject/ernie/cron/Main.java
+++ b/src/org/torproject/ernie/cron/Main.java
@@ -47,15 +47,10 @@ public class Main {
config.getWriteRelayDescriptorsRawFiles() ?
config.getRelayDescriptorRawFilesDirectory() : null) : null;
- // Prepare relay descriptor parser (only if we are writing the
- // consensus-health page to disk)
- RelayDescriptorParser rdp = rddi != null ?
- new RelayDescriptorParser(rddi, bsfh) : null;
-
// Import relay descriptors
- if (rdp != null) {
+ if (rddi != null) {
if (config.getImportDirectoryArchives()) {
- new ArchiveReader(rdp,
+ new ArchiveReader(rddi, bsfh,
new File(config.getDirectoryArchivesDirectory()),
statsDirectory,
config.getKeepDirectoryArchiveImportHistory());
diff --git a/src/org/torproject/ernie/cron/RelayDescriptorParser.java b/src/org/torproject/ernie/cron/RelayDescriptorParser.java
deleted file mode 100644
index ed52d4c..0000000
--- a/src/org/torproject/ernie/cron/RelayDescriptorParser.java
+++ /dev/null
@@ -1,387 +0,0 @@
-/* Copyright 2011, 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.cron;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-
-/**
- * Parses relay descriptors including network status consensuses and
- * votes, server and extra-info descriptors, and passes the results to the
- * stats handlers, to the archive writer, or to the relay descriptor
- * downloader.
- */
-public class RelayDescriptorParser {
-
- /**
- * Stats file handler that accepts parse results for bridge statistics.
- */
- private BridgeStatsFileHandler bsfh;
-
- /**
- * Relay descriptor database importer that stores relay descriptor
- * contents for later evaluation.
- */
- private RelayDescriptorDatabaseImporter rddi;
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- private SimpleDateFormat dateTimeFormat;
-
- /**
- * Initializes this class.
- */
- public RelayDescriptorParser(RelayDescriptorDatabaseImporter rddi,
- BridgeStatsFileHandler bsfh) {
- this.rddi = rddi;
- this.bsfh = bsfh;
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
-
- this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- }
-
- public void parse(byte[] data) {
- try {
- /* Remove any @ lines at the beginning of the file and parse the
- * first non-@ line to find out the descriptor type. */
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line = br.readLine();
- while (line != null && line.startsWith("@")) {
- line = br.readLine();
- }
- if (line == null) {
- this.logger.fine("We were given a file that doesn't contain a "
- + "single descriptor for parsing. Ignoring.");
- return;
- }
- br.close();
-
- /* Split the byte[] possibly containing multiple descriptors into
- * byte[]'s with one descriptor each and parse them. */
- String startToken = null;
- if (line.equals("network-status-version 3")) {
- startToken = "network-status-version 3";
- } else if (line.startsWith("router ")) {
- startToken = "router ";
- } else if (line.startsWith("extra-info ")) {
- startToken = "extra-info ";
- } else if (line.equals("dir-key-certificate-version 3")) {
- this.logger.fine("Not parsing dir key certificate.");
- return;
- } else {
- this.logger.warning("Unknown descriptor type. First line is '"
- + line + "'. Ignoring.");
- return;
- }
- String splitToken = "\n" + startToken;
- String ascii = new String(data, "US-ASCII");
- int length = data.length, start = ascii.indexOf(startToken);
- while (start < length) {
- int end = ascii.indexOf(splitToken, start);
- if (end < 0) {
- end = length;
- } else {
- end += 1;
- }
- byte[] descBytes = new byte[end - start];
- System.arraycopy(data, start, descBytes, 0, end - start);
- parseSingleDescriptor(descBytes);
- start = end;
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
- }
- }
-
- private void parseSingleDescriptor(byte[] data) {
- try {
- /* Convert descriptor to ASCII for parsing. This means we'll lose
- * the non-ASCII chars, but we don't care about them for parsing
- * anyway. */
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line = br.readLine();
- SimpleDateFormat parseFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (line.equals("network-status-version 3")) {
- // TODO when parsing the current consensus, check the fresh-until
- // time to see when we switch from hourly to half-hourly
- // consensuses
- boolean isConsensus = true;
- String validAfterTime = null, nickname = null,
- relayIdentity = null, serverDesc = null, version = null,
- ports = null;
- String dirSource = null, address = null;
- long validAfter = -1L, published = -1L, bandwidth = -1L,
- orPort = 0L, dirPort = 0L;
- SortedSet<String> relayFlags = null;
- StringBuilder rawStatusEntry = null;
- SortedSet<String> hashedRelayIdentities = new TreeSet<String>();
- while ((line = br.readLine()) != null) {
- if (line.equals("vote-status vote")) {
- isConsensus = false;
- } else if (line.startsWith("valid-after ")) {
- validAfterTime = line.substring("valid-after ".length());
- validAfter = parseFormat.parse(validAfterTime).getTime();
- } else if (line.startsWith("dir-source ")) {
- dirSource = line.split(" ")[2];
- } else if (line.startsWith("r ")) {
- if (isConsensus && relayIdentity != null &&
- this.rddi != null) {
- byte[] rawDescriptor = rawStatusEntry.toString().getBytes();
- this.rddi.addStatusEntry(validAfter, nickname,
- relayIdentity, serverDesc, published, address, orPort,
- dirPort, relayFlags, version, bandwidth, ports,
- rawDescriptor);
- relayFlags = null;
- version = null;
- bandwidth = -1L;
- ports = null;
- }
- rawStatusEntry = new StringBuilder(line + "\n");
- String[] parts = line.split(" ");
- if (parts.length < 9) {
- this.logger.log(Level.WARNING, "Could not parse r line '"
- + line + "' in descriptor. Skipping.");
- break;
- }
- nickname = parts[1];
- relayIdentity = Hex.encodeHexString(
- Base64.decodeBase64(parts[2] + "=")).
- toLowerCase();
- hashedRelayIdentities.add(DigestUtils.shaHex(
- Base64.decodeBase64(parts[2] + "=")).
- toUpperCase());
- serverDesc = Hex.encodeHexString(Base64.decodeBase64(
- parts[3] + "=")).toLowerCase();
- published = parseFormat.parse(parts[4] + " " + parts[5]).
- getTime();
- address = parts[6];
- orPort = Long.parseLong(parts[7]);
- dirPort = Long.parseLong(parts[8]);
- } else if (line.startsWith("s ") || line.equals("s")) {
- rawStatusEntry.append(line + "\n");
- relayFlags = new TreeSet<String>();
- if (line.length() > 2) {
- for (String flag : line.substring(2).split(" ")) {
- relayFlags.add(flag);
- }
- }
- } else if (line.startsWith("v ")) {
- rawStatusEntry.append(line + "\n");
- version = line.substring(2);
- } else if (line.startsWith("w ")) {
- rawStatusEntry.append(line + "\n");
- String[] parts = line.split(" ");
- for (String part : parts) {
- if (part.startsWith("Bandwidth=")) {
- bandwidth = Long.parseLong(part.substring(
- "Bandwidth=".length()));
- }
- }
- } else if (line.startsWith("p ")) {
- rawStatusEntry.append(line + "\n");
- ports = line.substring(2);
- }
- }
- if (isConsensus) {
- if (this.bsfh != null) {
- for (String hashedRelayIdentity : hashedRelayIdentities) {
- this.bsfh.addHashedRelay(hashedRelayIdentity);
- }
- }
- if (this.rddi != null) {
- this.rddi.addConsensus(validAfter, data);
- if (relayIdentity != null) {
- byte[] rawDescriptor = rawStatusEntry.toString().getBytes();
- this.rddi.addStatusEntry(validAfter, nickname,
- relayIdentity, serverDesc, published, address, orPort,
- dirPort, relayFlags, version, bandwidth, ports,
- rawDescriptor);
- }
- }
- } else {
- if (this.rddi != null) {
- this.rddi.addVote(validAfter, dirSource, data);
- }
- }
- } else if (line.startsWith("router ")) {
- String platformLine = null, bandwidthLine = null,
- extraInfoDigest = null, relayIdentifier = null;
- String[] parts = line.split(" ");
- String nickname = parts[1];
- String address = parts[2];
- int orPort = Integer.parseInt(parts[3]);
- int dirPort = Integer.parseInt(parts[4]);
- long published = -1L, uptime = -1L;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("platform ")) {
- platformLine = line;
- } else if (line.startsWith("published ")) {
- String publishedTime = line.substring("published ".length());
- published = parseFormat.parse(publishedTime).getTime();
- } else if (line.startsWith("opt fingerprint") ||
- line.startsWith("fingerprint")) {
- relayIdentifier = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- } else if (line.startsWith("bandwidth ")) {
- bandwidthLine = line;
- } else if (line.startsWith("opt extra-info-digest ") ||
- line.startsWith("extra-info-digest ")) {
- extraInfoDigest = line.startsWith("opt ") ?
- line.split(" ")[2].toLowerCase() :
- line.split(" ")[1].toLowerCase();
- } else if (line.startsWith("uptime ")) {
- uptime = Long.parseLong(line.substring("uptime ".length()));
- }
- }
- String ascii = new String(data, "US-ASCII");
- String startToken = "router ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- String digest = null;
- if (start >= 0 || sig >= 0 || sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- digest = DigestUtils.shaHex(forDigest);
- }
- if (this.rddi != null && digest != null) {
- String[] bwParts = bandwidthLine.split(" ");
- long bandwidthAvg = Long.parseLong(bwParts[1]);
- long bandwidthBurst = Long.parseLong(bwParts[2]);
- long bandwidthObserved = Long.parseLong(bwParts[3]);
- String platform = platformLine.substring("platform ".length());
- this.rddi.addServerDescriptor(digest, nickname, address, orPort,
- dirPort, relayIdentifier, bandwidthAvg, bandwidthBurst,
- bandwidthObserved, platform, published, uptime,
- extraInfoDigest, data);
- }
- } else if (line.startsWith("extra-info ")) {
- String nickname = line.split(" ")[1];
- long published = -1L;
- String dir = line.split(" ")[2];
- String statsEnd = null;
- long seconds = -1L;
- List<String> bandwidthHistory = new ArrayList<String>();
- while ((line = br.readLine()) != null) {
- if (line.startsWith("published ")) {
- String publishedTime = line.substring("published ".length());
- published = parseFormat.parse(publishedTime).getTime();
- } else if (line.startsWith("read-history ") ||
- line.startsWith("write-history ") ||
- line.startsWith("dirreq-read-history ") ||
- line.startsWith("dirreq-write-history ")) {
- bandwidthHistory.add(line);
- } else if (line.startsWith("dirreq-stats-end ")) {
- String[] parts = line.split(" ");
- if (parts.length < 5) {
- this.logger.warning("Could not parse dirreq-stats-end "
- + "line '" + line + "' in descriptor. Skipping.");
- break;
- }
- statsEnd = parts[1] + " " + parts[2];
- seconds = Long.parseLong(parts[3].substring(1));
- } else if (line.startsWith("dirreq-v3-reqs ")
- && line.length() > "dirreq-v3-reqs ".length()) {
- if (this.rddi != null) {
- try {
- int allUsers = 0;
- Map<String, String> obs = new HashMap<String, String>();
- String[] parts = line.substring("dirreq-v3-reqs ".
- length()).split(",");
- for (String p : parts) {
- String country = p.substring(0, 2);
- int users = Integer.parseInt(p.substring(3)) - 4;
- allUsers += users;
- obs.put(country, "" + users);
- }
- obs.put("zy", "" + allUsers);
- this.rddi.addDirReqStats(dir, statsEnd, seconds, obs);
- } catch (NumberFormatException e) {
- this.logger.log(Level.WARNING, "Could not parse "
- + "dirreq-v3-reqs line '" + line + "' in descriptor. "
- + "Skipping.", e);
- break;
- }
- }
- } else if (line.startsWith("conn-bi-direct ")) {
- if (this.rddi != null) {
- String[] parts = line.split(" ");
- if (parts.length == 6 &&
- parts[5].split(",").length == 4) {
- try {
- String connBiDirectStatsEnd = parts[1] + " " + parts[2];
- long connBiDirectSeconds = Long.parseLong(parts[3].
- substring(1));
- String[] parts2 = parts[5].split(",");
- long below = Long.parseLong(parts2[0]);
- long read = Long.parseLong(parts2[1]);
- long write = Long.parseLong(parts2[2]);
- long both = Long.parseLong(parts2[3]);
- this.rddi.addConnBiDirect(dir, connBiDirectStatsEnd,
- connBiDirectSeconds, below, read, write, both);
- } catch (NumberFormatException e) {
- this.logger.log(Level.WARNING, "Number format "
- + "exception while parsing conn-bi-direct stats "
- + "string '" + line + "'. Skipping.", e);
- }
- } else {
- this.logger.warning("Skipping invalid conn-bi-direct "
- + "stats string '" + line + "'.");
- }
- }
- }
- }
- String ascii = new String(data, "US-ASCII");
- String startToken = "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String digest = null;
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 || sig >= 0 || sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- digest = DigestUtils.shaHex(forDigest);
- }
- if (this.rddi != null && digest != null) {
- this.rddi.addExtraInfoDescriptor(digest, nickname,
- dir.toLowerCase(), published, data, bandwidthHistory);
- }
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
- }
- }
-}
-
1
0

22 Mar '12
commit b4d34b4cb2e90584597d297cddde9aa228ae1c66
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Mar 21 12:30:49 2012 +0100
Remove bridge descriptor parser.
The purpose of this commit is to prepare adding metrics-lib as library for
descriptor parsing.
---
.../ernie/cron/BridgeDescriptorParser.java | 148 --------------------
src/org/torproject/ernie/cron/Main.java | 9 +-
.../ernie/cron/SanitizedBridgesReader.java | 145 ++++++++++++++++++-
.../ernie/test/SanitizedBridgesReaderTest.java | 33 -----
4 files changed, 143 insertions(+), 192 deletions(-)
diff --git a/src/org/torproject/ernie/cron/BridgeDescriptorParser.java b/src/org/torproject/ernie/cron/BridgeDescriptorParser.java
deleted file mode 100644
index 221e896..0000000
--- a/src/org/torproject/ernie/cron/BridgeDescriptorParser.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/* Copyright 2011, 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.cron;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TimeZone;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.digest.DigestUtils;
-
-public class BridgeDescriptorParser {
- private ConsensusStatsFileHandler csfh;
- private BridgeStatsFileHandler bsfh;
- private Logger logger;
- public BridgeDescriptorParser(ConsensusStatsFileHandler csfh,
- BridgeStatsFileHandler bsfh) {
- this.csfh = csfh;
- this.bsfh = bsfh;
- this.logger =
- Logger.getLogger(BridgeDescriptorParser.class.getName());
- }
- public void parse(byte[] allData, String dateTime, boolean sanitized) {
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, "US-ASCII")));
- SimpleDateFormat timeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String hashedIdentity = null, platformLine = null,
- publishedLine = null, geoipStartTimeLine = null,
- bridgeStatsEndLine = null;
- boolean skip = false;
- String line = null;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("r ")) {
- int runningBridges = 0;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("s ") && line.contains(" Running")) {
- runningBridges++;
- }
- }
- if (this.csfh != null) {
- this.csfh.addBridgeConsensusResults(dateTime, runningBridges);
- }
- } else if (line.startsWith("router ")) {
- } else if (line.startsWith("extra-info ")) {
- hashedIdentity = sanitized ? line.split(" ")[2]
- : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase();
- if (this.bsfh != null) {
- skip = this.bsfh.isKnownRelay(hashedIdentity);
- }
- } else if (!skip && line.startsWith("platform ")) {
- platformLine = line;
- } else if (!skip && line.startsWith("published ")) {
- publishedLine = line;
- } else if (line.startsWith("opt fingerprint") ||
- line.startsWith("fingerprint")) {
- String identity = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- hashedIdentity = sanitized ? identity
- : DigestUtils.shaHex(identity).toUpperCase();
- } else if (!skip && line.startsWith("geoip-start-time ")) {
- geoipStartTimeLine = line;
- } else if (!skip && line.startsWith("geoip-client-origins")
- && line.split(" ").length > 1) {
- if (publishedLine == null ||
- geoipStartTimeLine == null) {
- this.logger.warning("Either published line or "
- + "geoip-start-time line is not present in "
- + (sanitized ? "sanitized" : "non-sanitized")
- + " bridge descriptors from " + dateTime + ".");
- break;
- }
- long published = timeFormat.parse(publishedLine.
- substring("published ".length())).getTime();
- long started = timeFormat.parse(geoipStartTimeLine.
- substring("geoip-start-time ".length())).getTime();
- long seconds = (published - started) / 1000L;
- double allUsers = 0.0D;
- Map<String, String> obs = new HashMap<String, String>();
- String[] parts = line.split(" ")[1].split(",");
- for (String p : parts) {
- String country = p.substring(0, 2);
- double users = ((double) Long.parseLong(p.substring(3)) - 4L)
- * 86400.0D / ((double) seconds);
- allUsers += users;
- obs.put(country, String.format("%.2f", users));
- }
- obs.put("zy", String.format("%.2f", allUsers));
- String date = publishedLine.split(" ")[1];
- String time = publishedLine.split(" ")[2];
- if (this.bsfh != null) {
- this.bsfh.addObs(hashedIdentity, date, time, obs);
- }
- } else if (!skip && line.startsWith("bridge-stats-end ")) {
- bridgeStatsEndLine = line;
- } else if (!skip && line.startsWith("bridge-ips")
- && line.split(" ").length > 1) {
- if (bridgeStatsEndLine == null) {
- this.logger.warning("bridge-ips line without preceding "
- + "bridge-stats-end line in "
- + (sanitized ? "sanitized" : "non-sanitized")
- + " bridge descriptor.");
- break;
- }
- double allUsers = 0.0D;
- Map<String, String> obs = new HashMap<String, String>();
- String[] parts = line.split(" ")[1].split(",");
- for (String p : parts) {
- String country = p.substring(0, 2);
- double users = (double) Long.parseLong(p.substring(3)) - 4L;
- allUsers += users;
- obs.put(country, String.format("%.2f", users));
- }
- obs.put("zy", String.format("%.2f", allUsers));
- String date = bridgeStatsEndLine.split(" ")[1];
- String time = bridgeStatsEndLine.split(" ")[2];
- if (this.bsfh != null) {
- this.bsfh.addObs(hashedIdentity, date, time, obs);
- }
- }
- }
- if (this.bsfh != null && platformLine != null &&
- platformLine.startsWith("platform Tor 0.2.2")) {
- String date = publishedLine.split(" ")[1];
- String time = publishedLine.split(" ")[2];
- this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time);
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
- e);
- return;
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
- e);
- return;
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java
index d631a7b..1c8ca48 100644
--- a/src/org/torproject/ernie/cron/Main.java
+++ b/src/org/torproject/ernie/cron/Main.java
@@ -68,13 +68,10 @@ public class Main {
new ConsensusStatsFileHandler(
config.getRelayDescriptorDatabaseJDBC()) : null;
- // Prepare bridge descriptor parser
- BridgeDescriptorParser bdp = config.getWriteBridgeStats() ?
- new BridgeDescriptorParser(csfh, bsfh) : null;
-
// Import bridge descriptors
- if (bdp != null && config.getImportSanitizedBridges()) {
- new SanitizedBridgesReader(bdp,
+ if (config.getWriteBridgeStats() &&
+ config.getImportSanitizedBridges()) {
+ new SanitizedBridgesReader(csfh, bsfh,
new File(config.getSanitizedBridgesDirectory()),
statsDirectory, config.getKeepSanitizedBridgesImportHistory());
}
diff --git a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
index 09b4983..78bd7db 100644
--- a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
+++ b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
@@ -11,24 +11,40 @@ import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.SortedSet;
import java.util.Stack;
+import java.util.TimeZone;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.apache.commons.codec.digest.DigestUtils;
+
public class SanitizedBridgesReader {
- public SanitizedBridgesReader(BridgeDescriptorParser bdp,
- File bridgesDir, File statsDirectory, boolean keepImportHistory) {
+ private ConsensusStatsFileHandler csfh;
+ private BridgeStatsFileHandler bsfh;
+ private Logger logger;
+ public SanitizedBridgesReader(ConsensusStatsFileHandler csfh,
+ BridgeStatsFileHandler bsfh, File bridgesDir, File statsDirectory,
+ boolean keepImportHistory) {
- if (bdp == null || bridgesDir == null || statsDirectory == null) {
+ if (csfh == null || bsfh == null || bridgesDir == null ||
+ statsDirectory == null) {
throw new IllegalArgumentException();
}
- Logger logger =
+ this.csfh = csfh;
+ this.bsfh = bsfh;
+ this.logger =
Logger.getLogger(SanitizedBridgesReader.class.getName());
+
SortedSet<String> bridgesImportHistory = new TreeSet<String>();
File bridgesImportHistoryFile =
new File(statsDirectory, "bridges-import-history");
@@ -79,7 +95,7 @@ public class SanitizedBridgesReader {
String dateTime = fn.substring(0, 4) + "-" + fn.substring(4, 6)
+ "-" + fn.substring(6, 8) + " " + fn.substring(9, 11)
+ ":" + fn.substring(11, 13) + ":" + fn.substring(13, 15);
- bdp.parse(allData, dateTime, true);
+ this.parse(allData, dateTime, true);
if (keepImportHistory) {
bridgesImportHistory.add(pop.getName());
}
@@ -123,5 +139,124 @@ public class SanitizedBridgesReader {
}
}
}
+
+ private void parse(byte[] allData, String dateTime, boolean sanitized) {
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(allData, "US-ASCII")));
+ SimpleDateFormat timeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String hashedIdentity = null, platformLine = null,
+ publishedLine = null, geoipStartTimeLine = null,
+ bridgeStatsEndLine = null;
+ boolean skip = false;
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("r ")) {
+ int runningBridges = 0;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("s ") && line.contains(" Running")) {
+ runningBridges++;
+ }
+ }
+ if (this.csfh != null) {
+ this.csfh.addBridgeConsensusResults(dateTime, runningBridges);
+ }
+ } else if (line.startsWith("router ")) {
+ } else if (line.startsWith("extra-info ")) {
+ hashedIdentity = sanitized ? line.split(" ")[2]
+ : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase();
+ if (this.bsfh != null) {
+ skip = this.bsfh.isKnownRelay(hashedIdentity);
+ }
+ } else if (!skip && line.startsWith("platform ")) {
+ platformLine = line;
+ } else if (!skip && line.startsWith("published ")) {
+ publishedLine = line;
+ } else if (line.startsWith("opt fingerprint") ||
+ line.startsWith("fingerprint")) {
+ String identity = line.substring(line.startsWith("opt ") ?
+ "opt fingerprint".length() : "fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ hashedIdentity = sanitized ? identity
+ : DigestUtils.shaHex(identity).toUpperCase();
+ } else if (!skip && line.startsWith("geoip-start-time ")) {
+ geoipStartTimeLine = line;
+ } else if (!skip && line.startsWith("geoip-client-origins")
+ && line.split(" ").length > 1) {
+ if (publishedLine == null ||
+ geoipStartTimeLine == null) {
+ this.logger.warning("Either published line or "
+ + "geoip-start-time line is not present in "
+ + (sanitized ? "sanitized" : "non-sanitized")
+ + " bridge descriptors from " + dateTime + ".");
+ break;
+ }
+ long published = timeFormat.parse(publishedLine.
+ substring("published ".length())).getTime();
+ long started = timeFormat.parse(geoipStartTimeLine.
+ substring("geoip-start-time ".length())).getTime();
+ long seconds = (published - started) / 1000L;
+ double allUsers = 0.0D;
+ Map<String, String> obs = new HashMap<String, String>();
+ String[] parts = line.split(" ")[1].split(",");
+ for (String p : parts) {
+ String country = p.substring(0, 2);
+ double users = ((double) Long.parseLong(p.substring(3)) - 4L)
+ * 86400.0D / ((double) seconds);
+ allUsers += users;
+ obs.put(country, String.format("%.2f", users));
+ }
+ obs.put("zy", String.format("%.2f", allUsers));
+ String date = publishedLine.split(" ")[1];
+ String time = publishedLine.split(" ")[2];
+ if (this.bsfh != null) {
+ this.bsfh.addObs(hashedIdentity, date, time, obs);
+ }
+ } else if (!skip && line.startsWith("bridge-stats-end ")) {
+ bridgeStatsEndLine = line;
+ } else if (!skip && line.startsWith("bridge-ips")
+ && line.split(" ").length > 1) {
+ if (bridgeStatsEndLine == null) {
+ this.logger.warning("bridge-ips line without preceding "
+ + "bridge-stats-end line in "
+ + (sanitized ? "sanitized" : "non-sanitized")
+ + " bridge descriptor.");
+ break;
+ }
+ double allUsers = 0.0D;
+ Map<String, String> obs = new HashMap<String, String>();
+ String[] parts = line.split(" ")[1].split(",");
+ for (String p : parts) {
+ String country = p.substring(0, 2);
+ double users = (double) Long.parseLong(p.substring(3)) - 4L;
+ allUsers += users;
+ obs.put(country, String.format("%.2f", users));
+ }
+ obs.put("zy", String.format("%.2f", allUsers));
+ String date = bridgeStatsEndLine.split(" ")[1];
+ String time = bridgeStatsEndLine.split(" ")[2];
+ if (this.bsfh != null) {
+ this.bsfh.addObs(hashedIdentity, date, time, obs);
+ }
+ }
+ }
+ if (this.bsfh != null && platformLine != null &&
+ platformLine.startsWith("platform Tor 0.2.2")) {
+ String date = publishedLine.split(" ")[1];
+ String time = publishedLine.split(" ")[2];
+ this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time);
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
+ e);
+ return;
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
+ e);
+ return;
+ }
+ }
}
diff --git a/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java b/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java
deleted file mode 100644
index 748894e..0000000
--- a/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright 2011, 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.test;
-
-import java.io.File;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.torproject.ernie.cron.SanitizedBridgesReader;
-
-public class SanitizedBridgesReaderTest {
-
- private File tempSanitizedBridgesDirectory;
- private File tempStatsDirectory;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Before
- public void createTempDirectories() {
- this.tempSanitizedBridgesDirectory = folder.newFolder("bridges");
- this.tempStatsDirectory = folder.newFolder("stats");
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testBridgeDescriptorParserNull() {
- new SanitizedBridgesReader(null, this.tempSanitizedBridgesDirectory,
- this.tempStatsDirectory, false);
- }
-}
-
1
0

[translation/vidalia_alpha] Update translations for vidalia_alpha
by translation@torproject.org 22 Mar '12
by translation@torproject.org 22 Mar '12
22 Mar '12
commit 412291270c78a1159e5ffb3ca8e56094d15ddc74
Author: Translation commit bot <translation(a)torproject.org>
Date: Thu Mar 22 10:45:13 2012 +0000
Update translations for vidalia_alpha
---
fa/vidalia_fa.po | 58 +++++++++++++++++++++++++++---------------------------
1 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/fa/vidalia_fa.po b/fa/vidalia_fa.po
index 0bac717..4526f87 100644
--- a/fa/vidalia_fa.po
+++ b/fa/vidalia_fa.po
@@ -8,7 +8,7 @@ msgstr ""
"Project-Id-Version: The Tor Project\n"
"Report-Msgid-Bugs-To: https://trac.torproject.org/projects/tor\n"
"POT-Creation-Date: 2012-03-21 17:46+0000\n"
-"PO-Revision-Date: 2012-03-22 10:15+0000\n"
+"PO-Revision-Date: 2012-03-22 10:31+0000\n"
"Last-Translator: ms2222 <slander_mo(a)hotmail.com>\n"
"Language-Team: translations(a)vidalia-project.net\n"
"MIME-Version: 1.0\n"
@@ -638,31 +638,31 @@ msgstr ""
msgctxt "CountryInfo"
msgid "Chile"
-msgstr ""
+msgstr "شیلی"
msgctxt "CountryInfo"
msgid "China"
-msgstr ""
+msgstr "چین"
msgctxt "CountryInfo"
msgid "Colombia"
-msgstr ""
+msgstr "کلمبیا"
msgctxt "CountryInfo"
msgid "Comoros"
-msgstr ""
+msgstr "کامرون"
msgctxt "CountryInfo"
msgid "Congo, The Democratic Republic of the"
-msgstr ""
+msgstr "جمهوری دموکرات کنگو"
msgctxt "CountryInfo"
msgid "Congo"
-msgstr ""
+msgstr "کنگو"
msgctxt "CountryInfo"
msgid "Costa Rica"
-msgstr ""
+msgstr "کاستاریکا"
msgctxt "CountryInfo"
msgid "Cote dâIvoire"
@@ -670,11 +670,11 @@ msgstr ""
msgctxt "CountryInfo"
msgid "Croatia"
-msgstr ""
+msgstr "کرواسی"
msgctxt "CountryInfo"
msgid "Cuba"
-msgstr ""
+msgstr "کوبا"
msgctxt "CountryInfo"
msgid "Cyprus"
@@ -682,11 +682,11 @@ msgstr ""
msgctxt "CountryInfo"
msgid "Czech Republic"
-msgstr ""
+msgstr "جمهوری چک"
msgctxt "CountryInfo"
msgid "Denmark"
-msgstr ""
+msgstr "دانمارک"
msgctxt "CountryInfo"
msgid "Djibouti"
@@ -698,19 +698,19 @@ msgstr ""
msgctxt "CountryInfo"
msgid "Dominican Republic"
-msgstr ""
+msgstr "جمهوری دومنیکن"
msgctxt "CountryInfo"
msgid "Ecuador"
-msgstr ""
+msgstr "اکوادور"
msgctxt "CountryInfo"
msgid "Egypt"
-msgstr ""
+msgstr "مصر"
msgctxt "CountryInfo"
msgid "El Salvador"
-msgstr ""
+msgstr "السالوادور"
msgctxt "CountryInfo"
msgid "Equatorial Guinea"
@@ -722,15 +722,15 @@ msgstr ""
msgctxt "CountryInfo"
msgid "Estonia"
-msgstr ""
+msgstr "استونی"
msgctxt "CountryInfo"
msgid "France"
-msgstr ""
+msgstr "فرانسه"
msgctxt "CountryInfo"
msgid "Gabon"
-msgstr ""
+msgstr "گابن"
msgctxt "CountryInfo"
msgid "Gambia"
@@ -934,15 +934,15 @@ msgstr "هلند"
msgctxt "CountryInfo"
msgid "New Zealand"
-msgstr ""
+msgstr "نیوزلند"
msgctxt "CountryInfo"
msgid "Nicaragua"
-msgstr ""
+msgstr "نیکاراگوئه"
msgctxt "CountryInfo"
msgid "Niger"
-msgstr ""
+msgstr "نیجر"
msgctxt "CountryInfo"
msgid "Nigeria"
@@ -950,15 +950,15 @@ msgstr ""
msgctxt "CountryInfo"
msgid "Norway"
-msgstr ""
+msgstr "نروژ"
msgctxt "CountryInfo"
msgid "Oman"
-msgstr ""
+msgstr "عمان"
msgctxt "CountryInfo"
msgid "Pakistan"
-msgstr ""
+msgstr "پاکستان"
msgctxt "CountryInfo"
msgid "Palau"
@@ -966,11 +966,11 @@ msgstr ""
msgctxt "CountryInfo"
msgid "Palestine"
-msgstr ""
+msgstr "فلسطین"
msgctxt "CountryInfo"
msgid "Panama"
-msgstr ""
+msgstr "پاناما"
msgctxt "CountryInfo"
msgid "Papua New Guinea"
@@ -978,11 +978,11 @@ msgstr ""
msgctxt "CountryInfo"
msgid "Paraguay"
-msgstr ""
+msgstr "پاراگوئه"
msgctxt "CountryInfo"
msgid "Peru"
-msgstr ""
+msgstr "پرو"
msgctxt "CountryInfo"
msgid "Philippines"
1
0

[translation/vidalia_alpha] Update translations for vidalia_alpha
by translation@torproject.org 22 Mar '12
by translation@torproject.org 22 Mar '12
22 Mar '12
commit 98632c66b816b0cbfd1f72848c1afb41f2743b3d
Author: Translation commit bot <translation(a)torproject.org>
Date: Thu Mar 22 10:15:12 2012 +0000
Update translations for vidalia_alpha
---
fa/vidalia_fa.po | 11 ++++++-----
1 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/fa/vidalia_fa.po b/fa/vidalia_fa.po
index 35c8f18..0bac717 100644
--- a/fa/vidalia_fa.po
+++ b/fa/vidalia_fa.po
@@ -2,13 +2,14 @@
# Translators:
# Ardeshir <ardeshir(a)redteam.io>, 2012.
# <behravanhamed(a)gmail.com>, 2011.
+# <slander_mo(a)hotmail.com>, 2012.
msgid ""
msgstr ""
"Project-Id-Version: The Tor Project\n"
"Report-Msgid-Bugs-To: https://trac.torproject.org/projects/tor\n"
"POT-Creation-Date: 2012-03-21 17:46+0000\n"
-"PO-Revision-Date: 2012-03-22 09:01+0000\n"
-"Last-Translator: Ardeshir <ardeshir(a)redteam.io>\n"
+"PO-Revision-Date: 2012-03-22 10:15+0000\n"
+"Last-Translator: ms2222 <slander_mo(a)hotmail.com>\n"
"Language-Team: translations(a)vidalia-project.net\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
@@ -593,7 +594,7 @@ msgstr ""
msgctxt "CountryInfo"
msgid "Brazil"
-msgstr ""
+msgstr "برزیل"
msgctxt "CountryInfo"
msgid "Brunei Darussalam"
@@ -617,11 +618,11 @@ msgstr ""
msgctxt "CountryInfo"
msgid "Cameroon"
-msgstr ""
+msgstr "کامرون"
msgctxt "CountryInfo"
msgid "Canada"
-msgstr ""
+msgstr "کانادا"
msgctxt "CountryInfo"
msgid "Cape Verde"
1
0