[tor-commits] [metrics-lib/master] Add support for parsing exit lists.

karsten at torproject.org karsten at torproject.org
Thu Feb 2 07:36:35 UTC 2012


commit 5c1494966f2af90f4bdb024c3c561cccad23b24e
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Feb 2 08:11:15 2012 +0100

    Add support for parsing exit lists.
---
 src/org/torproject/descriptor/ExitList.java        |   16 ++
 src/org/torproject/descriptor/ExitListEntry.java   |   25 +++
 .../torproject/descriptor/impl/DescriptorImpl.java |    3 +
 .../descriptor/impl/ExitListEntryImpl.java         |  170 ++++++++++++++++++++
 .../torproject/descriptor/impl/ExitListImpl.java   |  121 ++++++++++++++
 5 files changed, 335 insertions(+), 0 deletions(-)

diff --git a/src/org/torproject/descriptor/ExitList.java b/src/org/torproject/descriptor/ExitList.java
new file mode 100644
index 0000000..5372b8b
--- /dev/null
+++ b/src/org/torproject/descriptor/ExitList.java
@@ -0,0 +1,16 @@
+/* Copyright 2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.descriptor;
+
+import java.util.Set;
+
+/* Exit list containing all known exit scan results at a given time. */
+public interface ExitList extends Descriptor {
+
+  /* Return the publication time of the exit list. */
+  public long getPublishedMillis();
+
+  /* Return the unordered set of exit scan results. */
+  public Set<ExitListEntry> getExitListEntries();
+}
+
diff --git a/src/org/torproject/descriptor/ExitListEntry.java b/src/org/torproject/descriptor/ExitListEntry.java
new file mode 100644
index 0000000..74438de
--- /dev/null
+++ b/src/org/torproject/descriptor/ExitListEntry.java
@@ -0,0 +1,25 @@
+/* Copyright 2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.descriptor;
+
+/* Exit list entry containing results from a single exit scan. */
+public interface ExitListEntry {
+
+  /* Return the scanned relay's fingerprint. */
+  public String getFingerprint();
+
+  /* Return the publication time of the scanned relay's last known
+   * descriptor. */
+  public long getPublishedMillis();
+
+  /* Return the publication time of the network status that this scan was
+   * based on. */
+  public long getLastStatusMillis();
+
+  /* Return the IP address that was determined in the scan. */
+  public String getExitAddress();
+
+  /* Return the scan time. */
+  public long getScanMillis();
+}
+
diff --git a/src/org/torproject/descriptor/impl/DescriptorImpl.java b/src/org/torproject/descriptor/impl/DescriptorImpl.java
index 7db8aaf..e173612 100644
--- a/src/org/torproject/descriptor/impl/DescriptorImpl.java
+++ b/src/org/torproject/descriptor/impl/DescriptorImpl.java
@@ -63,6 +63,9 @@ public abstract class DescriptorImpl implements Descriptor {
           failUnrecognizedDescriptorLines));
     } else if (firstLines.startsWith("dir-key-certificate-version ")) {
       /* TODO Implement parsing of directory certificates. */
+    } else if (firstLines.startsWith("ExitNode ")) {
+      parsedDescriptors.add(new ExitListImpl(rawDescriptorBytes, fileName,
+          failUnrecognizedDescriptorLines));
     } else {
       throw new DescriptorParseException("Could not detect descriptor "
           + "type in descriptor starting with '" + firstLines + "'.");
diff --git a/src/org/torproject/descriptor/impl/ExitListEntryImpl.java b/src/org/torproject/descriptor/impl/ExitListEntryImpl.java
new file mode 100644
index 0000000..b3ed8f1
--- /dev/null
+++ b/src/org/torproject/descriptor/impl/ExitListEntryImpl.java
@@ -0,0 +1,170 @@
+/* Copyright 2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.descriptor.impl;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.torproject.descriptor.ExitListEntry;
+
+public class ExitListEntryImpl implements ExitListEntry {
+
+  private byte[] exitListEntryBytes;
+  public byte[] getExitListEntryBytes() {
+    return this.exitListEntryBytes;
+  }
+
+  private boolean failUnrecognizedDescriptorLines;
+  private List<String> unrecognizedLines;
+  protected List<String> getAndClearUnrecognizedLines() {
+    List<String> lines = this.unrecognizedLines;
+    this.unrecognizedLines = null;
+    return lines;
+  }
+
+  protected ExitListEntryImpl(byte[] exitListEntryBytes,
+      boolean failUnrecognizedDescriptorLines)
+      throws DescriptorParseException {
+    this.exitListEntryBytes = exitListEntryBytes;
+    this.failUnrecognizedDescriptorLines =
+        failUnrecognizedDescriptorLines;
+    this.initializeKeywords();
+    this.parseExitListEntryBytes();
+    this.checkKeywords();
+  }
+
+  private SortedSet<String> exactlyOnceKeywords;
+  private void initializeKeywords() {
+    this.exactlyOnceKeywords = new TreeSet<String>();
+    this.exactlyOnceKeywords.add("ExitNode");
+    this.exactlyOnceKeywords.add("Published");
+    this.exactlyOnceKeywords.add("LastStatus");
+    this.exactlyOnceKeywords.add("ExitAddress");
+  }
+
+  private void parsedExactlyOnceKeyword(String keyword)
+      throws DescriptorParseException {
+    if (!this.exactlyOnceKeywords.contains(keyword)) {
+      throw new DescriptorParseException("Duplicate '" + keyword
+          + "' line in exit list entry.");
+    }
+    this.exactlyOnceKeywords.remove(keyword);
+  }
+
+  private void checkKeywords() throws DescriptorParseException {
+    for (String missingKeyword : this.exactlyOnceKeywords) {
+      throw new DescriptorParseException("Missing '" + missingKeyword
+          + "' line in exit list entry.");
+    }
+  }
+
+  private void parseExitListEntryBytes()
+      throws DescriptorParseException {
+    try {
+      BufferedReader br = new BufferedReader(new StringReader(
+          new String(this.exitListEntryBytes)));
+      String line;
+      while ((line = br.readLine()) != null) {
+        String[] parts = line.split(" ");
+        String keyword = parts[0];
+        if (keyword.equals("ExitNode")) {
+          this.parseExitNodeLine(line, parts);
+        } else if (keyword.equals("Published")) {
+          this.parsePublishedLine(line, parts);
+        } else if (keyword.equals("LastStatus")) {
+          this.parseLastStatusLine(line, parts);
+        } else if (keyword.equals("ExitAddress")) {
+          this.parseExitAddressLine(line, parts);
+        } else if (this.failUnrecognizedDescriptorLines) {
+          throw new DescriptorParseException("Unrecognized line '" + line
+              + "' in exit list entry.");
+        } else {
+          if (this.unrecognizedLines == null) {
+            this.unrecognizedLines = new ArrayList<String>();
+          }
+          this.unrecognizedLines.add(line);
+        }
+      }
+    } catch (IOException e) {
+      throw new RuntimeException("Internal error: Ran into an "
+          + "IOException while parsing a String in memory.  Something's "
+          + "really wrong.", e);
+    }
+  }
+
+  private void parseExitNodeLine(String line, String[] parts)
+      throws DescriptorParseException {
+    if (parts.length != 2) {
+      throw new DescriptorParseException("Invalid line '" + line + "' in "
+          + "exit list entry.");
+    }
+    this.parsedExactlyOnceKeyword(parts[0]);
+    this.fingerprint = ParseHelper.parseTwentyByteHexString(line,
+        parts[1]);
+  }
+
+  private void parsePublishedLine(String line, String[] parts)
+      throws DescriptorParseException {
+    if (parts.length != 3) {
+      throw new DescriptorParseException("Invalid line '" + line + "' in "
+          + "exit list entry.");
+    }
+    this.parsedExactlyOnceKeyword(parts[0]);
+    this.publishedMillis = ParseHelper.parseTimestampAtIndex(line, parts,
+        1, 2);
+  }
+
+  private void parseLastStatusLine(String line, String[] parts)
+      throws DescriptorParseException {
+    if (parts.length != 3) {
+      throw new DescriptorParseException("Invalid line '" + line + "' in "
+          + "exit list entry.");
+    }
+    this.parsedExactlyOnceKeyword(parts[0]);
+    this.lastStatusMillis = ParseHelper.parseTimestampAtIndex(line, parts,
+        1, 2);
+  }
+
+  private void parseExitAddressLine(String line, String[] parts)
+      throws DescriptorParseException {
+    if (parts.length != 4) {
+      throw new DescriptorParseException("Invalid line '" + line + "' in "
+          + "exit list entry.");
+    }
+    this.parsedExactlyOnceKeyword(parts[0]);
+    this.exitAddress = ParseHelper.parseIpv4Address(line, parts[1]);
+    this.scanMillis = ParseHelper.parseTimestampAtIndex(line, parts,
+        2, 3);
+  }
+
+  private String fingerprint;
+  public String getFingerprint() {
+    return this.fingerprint;
+  }
+
+  private long publishedMillis;
+  public long getPublishedMillis() {
+    return this.publishedMillis;
+  }
+
+  private long lastStatusMillis;
+  public long getLastStatusMillis() {
+    return this.lastStatusMillis;
+  }
+
+  private String exitAddress;
+  public String getExitAddress() {
+    return this.exitAddress;
+  }
+
+  private long scanMillis;
+  public long getScanMillis() {
+    return this.scanMillis;
+  }
+}
+
diff --git a/src/org/torproject/descriptor/impl/ExitListImpl.java b/src/org/torproject/descriptor/impl/ExitListImpl.java
new file mode 100644
index 0000000..4861fa0
--- /dev/null
+++ b/src/org/torproject/descriptor/impl/ExitListImpl.java
@@ -0,0 +1,121 @@
+/* Copyright 2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.descriptor.impl;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.TimeZone;
+
+import org.torproject.descriptor.ExitList;
+import org.torproject.descriptor.ExitListEntry;
+
+/* TODO Add test class. */
+public class ExitListImpl extends DescriptorImpl implements ExitList {
+
+  protected ExitListImpl(byte[] rawDescriptorBytes, String fileName,
+      boolean failUnrecognizedDescriptorLines)
+      throws DescriptorParseException {
+    super(rawDescriptorBytes, failUnrecognizedDescriptorLines);
+    this.setPublishedMillisFromFileName(fileName);
+    this.splitAndParseExitListEntries(rawDescriptorBytes);
+  }
+
+  private void setPublishedMillisFromFileName(String fileName)
+      throws DescriptorParseException {
+    if (fileName.length() == "2012-02-01-04-06-24".length()) {
+      try {
+        SimpleDateFormat fileNameFormat = new SimpleDateFormat(
+            "yyyy-MM-dd-HH-mm-ss");
+        fileNameFormat.setLenient(false);
+        fileNameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+        this.publishedMillis = fileNameFormat.parse(fileName).getTime();
+      } catch (ParseException e) {
+        /* Handle below. */
+      }
+    }
+    if (this.publishedMillis == 0L) {
+      throw new DescriptorParseException("Unrecognized exit list file "
+          + "name '" + fileName + "'.");
+    }
+  }
+
+  private void splitAndParseExitListEntries(byte[] rawDescriptorBytes)
+      throws DescriptorParseException {
+    if (this.rawDescriptorBytes.length == 0) {
+      throw new DescriptorParseException("Descriptor is empty.");
+    }
+    String descriptorString = new String(rawDescriptorBytes);
+    if (descriptorString.startsWith("\n") ||
+        descriptorString.contains("\n\n")) {
+      throw new DescriptorParseException("Empty lines are not allowed.");
+    }
+    try {
+      BufferedReader br = new BufferedReader(new StringReader(
+          descriptorString));
+      String line;
+      StringBuilder sb = new StringBuilder();
+      while ((line = br.readLine()) != null) {
+        String[] parts = line.split(" ");
+        String keyword = parts[0];
+        if (keyword.equals("ExitNode")) {
+          sb = new StringBuilder();
+          sb.append(line + "\n");
+        } else if (keyword.equals("Published")) {
+          sb.append(line + "\n");
+        } else if (keyword.equals("LastStatus")) {
+          sb.append(line + "\n");
+        } else if (keyword.equals("ExitAddress")) {
+          String exitListEntryString = sb.toString() + line + "\n";
+          byte[] exitListEntryBytes = exitListEntryString.getBytes();
+          this.parseExitListEntry(exitListEntryBytes);
+        } else if (this.failUnrecognizedDescriptorLines) {
+          throw new DescriptorParseException("Unrecognized line '" + line
+              + "' in exit list.");
+        } else {
+          if (this.unrecognizedLines == null) {
+            this.unrecognizedLines = new ArrayList<String>();
+          }
+          this.unrecognizedLines.add(line);
+        }
+      }
+    } catch (IOException e) {
+      throw new RuntimeException("Internal error: Ran into an "
+          + "IOException while parsing a String in memory.  Something's "
+          + "really wrong.", e);
+    }
+  }
+
+  protected void parseExitListEntry(byte[] exitListEntryBytes)
+      throws DescriptorParseException {
+    ExitListEntryImpl exitListEntry = new ExitListEntryImpl(
+        exitListEntryBytes, this.failUnrecognizedDescriptorLines);
+    this.exitListEntries.add(exitListEntry);
+    List<String> unrecognizedExitListEntryLines = exitListEntry.
+        getAndClearUnrecognizedLines();
+    if (unrecognizedExitListEntryLines != null) {
+      if (this.unrecognizedLines == null) {
+        this.unrecognizedLines = new ArrayList<String>();
+      }
+      this.unrecognizedLines.addAll(unrecognizedExitListEntryLines);
+    }
+  }
+
+  private long publishedMillis;
+  public long getPublishedMillis() {
+    return this.publishedMillis;
+  }
+
+  private Set<ExitListEntry> exitListEntries =
+      new HashSet<ExitListEntry>();
+  public Set<ExitListEntry> getExitListEntries() {
+    return new HashSet<ExitListEntry>(this.exitListEntries);
+  }
+}
+



More information about the tor-commits mailing list