[tor-commits] [stem/master] Support for bridge network status documents

atagar at torproject.org atagar at torproject.org
Sun Jan 13 05:06:45 UTC 2013


commit b236ac4e0ba830352c447537be6cf59d85650ae0
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Jan 12 21:04:01 2013 -0800

    Support for bridge network status documents
    
    Tor metrics has network status documents for bridges. These are not part of the
    dir-spec, and presently not even in the metrics spec. However, they're trivial
    to parse, consisting of just a 'published' line followed by v3 router status
    entries.
    
    This resolves...
    
    https://trac.torproject.org/7938
---
 run_tests.py                                   |    2 +
 stem/descriptor/__init__.py                    |   21 ++++++--
 stem/descriptor/networkstatus.py               |   61 ++++++++++++++++++++---
 test/integ/descriptor/networkstatus.py         |   27 ++++++++++-
 test/unit/descriptor/networkstatus/__init__.py |    2 +-
 5 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/run_tests.py b/run_tests.py
index 0edb785..db24c0e 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -25,6 +25,7 @@ import test.unit.connection.authentication
 import test.unit.control.controller
 import test.unit.descriptor.export
 import test.unit.descriptor.extrainfo_descriptor
+import test.unit.descriptor.networkstatus.bridge_document
 import test.unit.descriptor.networkstatus.directory_authority
 import test.unit.descriptor.networkstatus.document_v2
 import test.unit.descriptor.networkstatus.document_v3
@@ -129,6 +130,7 @@ UNIT_TESTS = (
   test.unit.descriptor.networkstatus.key_certificate.TestKeyCertificate,
   test.unit.descriptor.networkstatus.document_v2.TestNetworkStatusDocument,
   test.unit.descriptor.networkstatus.document_v3.TestNetworkStatusDocument,
+  test.unit.descriptor.networkstatus.bridge_document.TestBridgeNetworkStatusDocument,
   test.unit.exit_policy.rule.TestExitPolicyRule,
   test.unit.exit_policy.policy.TestExitPolicy,
   test.unit.version.TestVersion,
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 9a314ba..ca10823 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -131,14 +131,25 @@ def _parse_metrics_file(descriptor_type, major_version, minor_version, descripto
     # https://trac.torproject.org/6257
 
     yield stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor(descriptor_file.read())
-  elif descriptor_type in ("network-status-consensus-3", "network-status-vote-3") and major_version == 1:
-    for desc in stem.descriptor.networkstatus.parse_file(descriptor_file):
-      yield desc
   elif descriptor_type == "network-status-2" and major_version == 1:
-    for desc in stem.descriptor.networkstatus.parse_file(descriptor_file, document_version = 2):
+    document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV2
+
+    for desc in stem.descriptor.networkstatus.parse_file(descriptor_file, document_type):
+      yield desc
+  elif descriptor_type in ("network-status-consensus-3", "network-status-vote-3") and major_version == 1:
+    document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
+
+    for desc in stem.descriptor.networkstatus.parse_file(descriptor_file, document_type):
       yield desc
   elif descriptor_type == "network-status-microdesc-consensus-3" and major_version == 1:
-    for desc in stem.descriptor.networkstatus.parse_file(descriptor_file, is_microdescriptor = True):
+    document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
+
+    for desc in stem.descriptor.networkstatus.parse_file(descriptor_file, document_type, is_microdescriptor = True):
+      yield desc
+  elif descriptor_type == "bridge-network-status" and major_version == 1:
+    document_type = stem.descriptor.networkstatus.BridgeNetworkStatusDocument
+
+    for desc in stem.descriptor.networkstatus.parse_file(descriptor_file, document_type):
       yield desc
   else:
     raise TypeError("Unrecognized metrics descriptor format. type: '%s', version: '%i.%i'" % (descriptor_type, major_version, minor_version))
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index cef668a..a056917 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -166,18 +166,18 @@ BANDWIDTH_WEIGHT_ENTRIES = (
 )
 
 
-def parse_file(document_file, validate = True, is_microdescriptor = False, document_version = 3):
+def parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False):
   """
   Parses a network status and iterates over the RouterStatusEntry in it. The
   document that these instances reference have an empty 'routers' attribute to
   allow for limited memory usage.
 
   :param file document_file: file with network status document content
+  :param class document_type: NetworkStatusDocument subclass
   :param bool validate: checks the validity of the document's contents if
     **True**, skips these checks otherwise
   :param bool is_microdescriptor: **True** if this is for a microdescriptor
     consensus, **False** otherwise
-  :param int document_version: network status document version
 
   :returns: :class:`stem.descriptor.networkstatus.NetworkStatusDocument` object
 
@@ -187,6 +187,11 @@ def parse_file(document_file, validate = True, is_microdescriptor = False, docum
     * **IOError** if the file can't be read
   """
 
+  # we can't properly default this since NetworkStatusDocumentV3 isn't defined yet
+
+  if document_type is None:
+    document_type = NetworkStatusDocumentV3
+
   # getting the document without the routers section
 
   header = stem.descriptor._read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file)
@@ -198,18 +203,19 @@ def parse_file(document_file, validate = True, is_microdescriptor = False, docum
   footer = document_file.readlines()
   document_content = "".join(header + footer)
 
-  if document_version == 2:
+  if document_type == NetworkStatusDocumentV2:
     document_type = NetworkStatusDocumentV2
-    router_type = stem.descriptor.router_status_entry.RouterStatusEntryV3
-  elif document_version == 3:
-    document_type = NetworkStatusDocumentV3
-
+    router_type = stem.descriptor.router_status_entry.RouterStatusEntryV2
+  elif document_type == NetworkStatusDocumentV3:
     if not is_microdescriptor:
       router_type = stem.descriptor.router_status_entry.RouterStatusEntryV3
     else:
       router_type = stem.descriptor.router_status_entry.RouterStatusEntryMicroV3
+  elif document_type == BridgeNetworkStatusDocument:
+    document_type = BridgeNetworkStatusDocument
+    router_type = stem.descriptor.router_status_entry.RouterStatusEntryV3
   else:
-    raise ValueError("Document version %i isn't recognized (only able to parse v2 or v3)" % document_version)
+    raise ValueError("Document type %i isn't recognized (only able to parse v2, v3, and bridge)" % document_type)
 
   desc_iterator = stem.descriptor.router_status_entry.parse_file(
     document_file,
@@ -1326,3 +1332,42 @@ class DocumentSignature(object):
         return -1
 
     return 0
+
+
+class BridgeNetworkStatusDocument(NetworkStatusDocument):
+  """
+  Network status document containing bridges. This is only available through
+  the metrics site.
+
+  :var tuple routers: :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`
+    contained in the document
+  :var datetime published: time when the document was published
+  """
+
+  def __init__(self, raw_content, validate = True):
+    super(BridgeNetworkStatusDocument, self).__init__(raw_content)
+
+    self.routers = None
+    self.published = None
+
+    document_file = StringIO.StringIO(raw_content)
+
+    published_line = document_file.readline()
+
+    if published_line.startswith("published "):
+      published_line = published_line.split(" ", 1)[1].strip()
+
+      try:
+        self.published = datetime.datetime.strptime(published_line, "%Y-%m-%d %H:%M:%S")
+      except ValueError:
+        if validate:
+          raise ValueError("Bridge network status document's 'published' time wasn't parsable: %s" % published_line)
+    elif validate:
+      raise ValueError("Bridge network status documents must start with a 'published' line:\n%s" % raw_content)
+
+    self.routers = tuple(stem.descriptor.router_status_entry.parse_file(
+      document_file,
+      validate,
+      entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV3,
+      extra_args = (self,),
+    ))
diff --git a/test/integ/descriptor/networkstatus.py b/test/integ/descriptor/networkstatus.py
index 6627a48..c5a5d77 100644
--- a/test/integ/descriptor/networkstatus.py
+++ b/test/integ/descriptor/networkstatus.py
@@ -40,7 +40,9 @@ class TestNetworkStatus(unittest.TestCase):
 
     count = 0
     with open(consensus_path) as descriptor_file:
-      for router in stem.descriptor.networkstatus.parse_file(descriptor_file):
+      document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
+
+      for router in stem.descriptor.networkstatus.parse_file(descriptor_file, document_type):
         count += 1
 
         # We should have constant memory usage. Fail if we're using over 200 MB.
@@ -85,7 +87,9 @@ class TestNetworkStatus(unittest.TestCase):
 
     count = 0
     with open(consensus_path) as descriptor_file:
-      for router in stem.descriptor.networkstatus.parse_file(descriptor_file, is_microdescriptor = True):
+      document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
+
+      for router in stem.descriptor.networkstatus.parse_file(descriptor_file, document_type, is_microdescriptor = True):
         count += 1
 
         if resource.getrusage(resource.RUSAGE_SELF).ru_maxrss > 200000:
@@ -124,6 +128,25 @@ class TestNetworkStatus(unittest.TestCase):
       self.assertEquals(80, router.or_port)
       self.assertEquals(None, router.dir_port)
 
+  def test_metrics_bridge_consensus(self):
+    """
+    Checks if the bridge documents from Metrics are parsed properly.
+    """
+
+    consensus_path = test.integ.descriptor.get_resource("bridge_network_status")
+
+    with open(consensus_path) as descriptor_file:
+      descriptors = stem.descriptor.parse_file(consensus_path, descriptor_file)
+
+      router = next(descriptors)
+      self.assertEquals("Unnamed", router.nickname)
+      self.assertEquals("0014A2055278DB3EB0E59EA701741416AF185558", router.fingerprint)
+      self.assertEquals("FI74aFuNJZZQrgln0f+OaocMd0M", router.digest)
+      self.assertEquals(datetime.datetime(2012, 5, 31, 15, 57, 0), router.published)
+      self.assertEquals("10.97.236.247", router.address)
+      self.assertEquals(443, router.or_port)
+      self.assertEquals(None, router.dir_port)
+
   def test_consensus_v3(self):
     """
     Checks that version 3 consensus documents are properly parsed.
diff --git a/test/unit/descriptor/networkstatus/__init__.py b/test/unit/descriptor/networkstatus/__init__.py
index e1ffef4..bd68bfe 100644
--- a/test/unit/descriptor/networkstatus/__init__.py
+++ b/test/unit/descriptor/networkstatus/__init__.py
@@ -2,4 +2,4 @@
 Unit tests for stem.descriptor.networkstatus.
 """
 
-__all__ = ["directory_authority", "key_certificate", "document_v3"]
+__all__ = ["bridge_document", "directory_authority", "key_certificate", "document_v2", "document_v3"]



More information about the tor-commits mailing list