[tor-commits] [stem/master] Add microdescriptor parsing

atagar at torproject.org atagar at torproject.org
Sat Oct 13 18:35:44 UTC 2012


commit 8ad310114b1ea7b743a868a8b70832eea5b8f3e2
Author: Ravi Chandra Padmala <neenaoffline at gmail.com>
Date:   Fri Aug 10 17:49:23 2012 +0530

    Add microdescriptor parsing
---
 run_tests.py                           |    1 +
 stem/descriptor/__init__.py            |    4 +
 stem/descriptor/networkstatus.py       |  218 ++++++++++++++++++++++++++++++--
 test/integ/descriptor/networkstatus.py |   42 ++++++
 4 files changed, 254 insertions(+), 11 deletions(-)

diff --git a/run_tests.py b/run_tests.py
index 8d115f1..b0550d6 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -136,6 +136,7 @@ INTEG_TESTS = (
   test.integ.descriptor.server_descriptor.TestServerDescriptor,
   test.integ.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor,
   test.integ.descriptor.networkstatus.TestNetworkStatusDocument,
+  test.integ.descriptor.networkstatus.TestMicrodescriptorConsensus,
   test.integ.version.TestVersion,
   test.integ.response.protocolinfo.TestProtocolInfo,
   test.integ.process.TestProcess,
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 40f03ad..d9ac21b 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -66,6 +66,8 @@ def parse_file(path, descriptor_file):
     file_parser = stem.descriptor.extrainfo_descriptor.parse_file
   elif filename == "cached-consensus":
     file_parser = stem.descriptor.networkstatus.parse_file
+  elif filename == "cached-microdesc-consensus":
+    file_parser = lambda f: stem.descriptor.networkstatus.parse_file(f, True, "microdesc")
   
   if file_parser:
     for desc in file_parser(descriptor_file):
@@ -103,6 +105,8 @@ def parse_file(path, descriptor_file):
         desc._set_path(path)
         yield desc
       return
+    elif desc_type == "network-status-microdesc-consensus-3" and major_version == 1:
+      desc = stem.descriptor.networkstatus.MicrodescriptorConsensus(descriptor_file.read())
   
   if desc:
     desc._set_path(path)
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index 214a33c..7effc7e 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -59,9 +59,30 @@ _bandwidth_weights_regex = re.compile(" ".join(["W%s=\d+" % weight for weight in
 
 _router_desc_end_kws = ["r", "bandwidth-weights", "directory-footer", "directory-signature"]
 
+Flavour = stem.util.enum.Enum(
+  ("NONE", ""),
+  ("NS", "ns"),
+  ("MICRODESCRIPTOR", "microdesc"),
+  )
+
+Flag = stem.util.enum.Enum(
+  ("AUTHORITY", "Authority"),
+  ("BADEXIT", "BadExit"),
+  ("EXIT", "Exit"),
+  ("FAST", "Fast"),
+  ("GUARD", "Guard"),
+  ("HSDIR", "HSDir"),
+  ("NAMED", "Named"),
+  ("RUNNING", "Running"),
+  ("STABLE", "Stable"),
+  ("UNNAMED", "Unnamed"),
+  ("V2DIR", "V2Dir"),
+  ("VALID", "Valid"),
+  )
+
 Flag = stem.util.enum.Enum(*[(flag.upper(), flag) for flag in ["Authority", "BadExit", "Exit", "Fast", "Guard", "HSDir", "Named", "Running", "Stable", "Unnamed", "V2Dir", "Valid"]])
 
-def parse_file(document_file, validate = True):
+def parse_file(document_file, validate = True, flavour = Flavour.NONE):
   """
   Iterates over the router descriptors in a network status document.
   
@@ -83,15 +104,27 @@ def parse_file(document_file, validate = True):
   _skip_until_keywords(["bandwidth-weights", "directory-footer", "directory-signature"], document_file)
   # parse until end
   document_data = document_data + document_file.read()
-  document = NetworkStatusDocument(document_data, validate)
-  document_file.seek(r_offset)
-  document.router_descriptors = _router_desc_generator(document_file, document.vote_status == "vote", validate, document.known_flags)
-  return document.router_descriptors
+  
+  if flavour == Flavour.NONE:
+    document = NetworkStatusDocument(document_data, validate)
+    document_file.seek(r_offset)
+    document.router_descriptors = _ns_router_desc_generator(document_file, document.vote_status == "vote", validate)
+    yield document
+  elif flavour == Flavour.MICRODESCRIPTOR:
+    document = MicrodescriptorConsensus(document_data, validate)
+    document_file.seek(r_offset)
+    document.router_descriptors = _router_microdesc_generator(document_file, validate, document.known_flags)
+    yield document
+
+def _ns_router_desc_generator(document_file, vote, validate):
+  while _peek_keyword(document_file) == "r":
+    desc_content = "".join(_read_until_keywords(_router_desc_end_kws, document_file, False, True))
+    yield RouterDescriptor(desc_content, vote, validate)
 
-def _router_desc_generator(document_file, vote, validate, known_flags):
+def _router_microdesc_generator(document_file, validate, known_flags):
   while _peek_keyword(document_file) == "r":
     desc_content = "".join(_read_until_keywords(_router_desc_end_kws, document_file, False, True))
-    yield RouterDescriptor(desc_content, vote, validate, known_flags)
+    yield RouterMicrodescriptor(desc_content, validate, known_flags)
 
 class NetworkStatusDocument(stem.descriptor.Descriptor):
   """
@@ -159,8 +192,10 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
     
     self._parse(raw_content)
   
-  def _generate_router(self, raw_content, vote, validate, known_flags):
-    return RouterDescriptor(raw_content, vote, validate, known_flags)
+  def _router_desc_generator(self, document_file):
+    while _peek_keyword(document_file) == "r":
+      desc_content = "".join(_read_until_keywords(_router_desc_end_kws, document_file, False, True))
+      yield RouterDescriptor(desc_content, self.vote_status == "vote", self.validated, self.known_flags)
   
   def _validate_network_status_version(self):
     return self.network_status_version == "3"
@@ -223,7 +258,7 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
     # router descriptors
     if _peek_keyword(content) == "r":
       router_descriptors_data = "".join(_read_until_keywords(["bandwidth-weights", "directory-footer", "directory-signature"], content, False, True))
-      self.router_descriptors = _router_desc_generator(StringIO(router_descriptors_data), vote, validate, self.known_flags)
+      self.router_descriptors = self._router_desc_generator(StringIO(router_descriptors_data))
     
     # footer section
     if self.consensus_method > 9 or vote and filter(lambda x: x >= 9, self.consensus_methods):
@@ -394,7 +429,7 @@ class RouterDescriptor(stem.descriptor.Descriptor):
     :param bool vote: True if the descriptor is from a vote document
     :param bool validate: whether the router descriptor should be validated
     :param bool known_flags: list of known router status flags
-
+    
     :raises: ValueError if the descriptor data is invalid
     """
     
@@ -524,3 +559,164 @@ class RouterDescriptor(stem.descriptor.Descriptor):
     
     return self.unrecognized_lines
 
+class MicrodescriptorConsensus(NetworkStatusDocument):
+  """
+  A v3 microdescriptor consensus.
+  
+  :var bool validated: **\*** whether the document is validated
+  :var str network_status_version: **\*** a document format version. For v3 microdescriptor consensuses this is "3 microdesc"
+  :var str vote_status: **\*** status of the vote (is "consensus")
+  :var int consensus_method: **~** consensus method used to generate a consensus
+  :var datetime valid_after: **\*** time when the consensus becomes valid
+  :var datetime fresh_until: **\*** time until when the consensus is considered to be fresh
+  :var datetime valid_until: **\*** time until when the consensus is valid
+  :var int vote_delay: **\*** number of seconds allowed for collecting votes from all authorities
+  :var int dist_delay: number of seconds allowed for collecting signatures from all authorities
+  :var list client_versions: list of recommended Tor client versions
+  :var list server_versions: list of recommended Tor server versions
+  :var list known_flags: **\*** list of known router flags
+  :var list params: dict of parameter(str) => value(int) mappings
+  :var list router_descriptors: **\*** iterator for RouterDescriptor objects defined in the document
+  :var list directory_authorities: **\*** list of DirectoryAuthority objects that have generated this document
+  :var dict bandwidth_weights: **~** dict of weight(str) => value(int) mappings
+  :var list directory_signatures: **\*** list of signatures this document has
+  
+  | **\*** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined
+  | **~** attribute appears only in consensuses
+  """
+  
+  def _router_desc_generator(self, document_file):
+    while _peek_keyword(document_file) == "r":
+      desc_content = "".join(_read_until_keywords(_router_desc_end_kws, document_file, False, True))
+      yield RouterMicrodescriptor(desc_content, self.validated, self.known_flags)
+  
+  def _validate_network_status_version(self):
+    return self.network_status_version == "3 microdesc"
+
+class RouterMicrodescriptor(RouterDescriptor):
+  """
+  Router microdescriptor object. Parses and stores router information in a router
+  microdescriptor from a v3 microdescriptor consensus.
+  
+  :var str nickname: **\*** router's nickname
+  :var str identity: **\*** router's identity
+  :var datetime publication: **\*** router's publication
+  :var str ip: **\*** router's IP address
+  :var int orport: **\*** router's ORPort
+  :var int dirport: **\*** router's DirPort
+  
+  :var list flags: **\*** list of status flags
+  :var list unknown_flags: **\*** list of unidentified status flags
+  
+  :var :class:`stem.version.Version`,str version: Version of the Tor protocol this router is running
+  
+  :var int bandwidth: router's claimed bandwidth
+  :var int measured_bandwidth: router's measured bandwidth
+  
+  :var str digest: base64 of the hash of the router's microdescriptor with trailing =s omitted
+  
+  | **\*** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined
+  """
+  
+  def __init__(self, raw_contents, validate = True, known_flags = Flag):
+    """
+    Parse a router descriptor in a v3 microdescriptor consensus and provide a new
+    RouterMicrodescriptor object.
+    
+    :param str raw_content: router descriptor content to be parsed
+    :param bool validate: whether the router descriptor should be validated
+    :param bool known_flags: list of known router status flags
+    
+    :raises: ValueError if the descriptor data is invalid
+    """
+    
+    super(RouterMicrodescriptor, self).__init__(raw_contents, False, validate, known_flags)
+  
+  def _parse(self, raw_content, _, validate, known_flags):
+    """
+    :param dict raw_content: router descriptor contents to be parsed
+    :param bool validate: checks the validity of descriptor content if True
+    :param bool known_flags: list of known router status flags
+    
+    :raises: ValueError if an error occures in validation
+    """
+    
+    content = StringIO(raw_content)
+    seen_keywords = set()
+    peek_check_kw = lambda keyword: keyword == _peek_keyword(content)
+    
+    r = _read_keyword_line("r", content, validate)
+    # r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0
+    # "r" SP nickname SP identity SP digest SP publication SP IP SP ORPort SP DirPort NL
+    if r:
+      seen_keywords.add("r")
+      values = r.split(" ")
+      self.nickname, self.identity = values[0], values[1]
+      self.publication = _strptime(" ".join((values[2], values[3])), validate)
+      self.ip, self.orport, self.dirport = values[4], int(values[5]), int(values[6])
+      if self.dirport == 0: self.dirport = None
+    elif validate: raise ValueError("Invalid router descriptor: empty 'r' line")
+    
+    while _peek_line(content):
+      if peek_check_kw("s"):
+        if "s" in seen_keywords: raise ValueError("Invalid router descriptor: 's' line appears twice")
+        line = _read_keyword_line("s", content, validate)
+        if not line: continue
+        seen_keywords.add("s")
+        # s Named Running Stable Valid
+        #A series of space-separated status flags, in *lexical order*
+        self.flags = line.split(" ")
+        
+        self.unknown_flags = filter(lambda f: not f in known_flags, self.flags)
+        if validate and self.unknown_flags:
+          raise ValueError("Router contained unknown flags: %s", " ".join(self.unknown_flags))
+      
+      elif peek_check_kw("v"):
+        if "v" in seen_keywords: raise ValueError("Invalid router descriptor: 'v' line appears twice")
+        line = _read_keyword_line("v", content, validate, True)
+        seen_keywords.add("v")
+        # v Tor 0.2.2.35
+        if line:
+          if line.startswith("Tor "):
+            self.version = stem.version.Version(line[4:])
+          else:
+            self.version = line
+        elif validate: raise ValueError("Invalid router descriptor: empty 'v' line" )
+      
+      elif peek_check_kw("w"):
+        if "w" in seen_keywords: raise ValueError("Invalid router descriptor: 'w' line appears twice")
+        w = _read_keyword_line("w", content, validate, True)
+        # "w" SP "Bandwidth=" INT [SP "Measured=" INT] NL
+        seen_keywords.add("w")
+        if w:
+          values = w.split(" ")
+          if len(values) <= 2 and len(values) > 0:
+            key, value = values[0].split("=")
+            if key == "Bandwidth": self.bandwidth = int(value)
+            elif validate: raise ValueError("Router descriptor contains invalid 'w' line: expected Bandwidth, read " + key)
+            
+            if len(values) == 2:
+              key, value = values[1].split("=")
+              if key == "Measured": self.measured_bandwidth = int(value)
+              elif validate: raise ValueError("Router descriptor contains invalid 'w' line: expected Measured, read " + key)
+          elif validate: raise ValueError("Router descriptor contains invalid 'w' line")
+        elif validate: raise ValueError("Router descriptor contains empty 'w' line")
+      
+      elif peek_check_kw("m"):
+        # microdescriptor hashes
+        self.digest = _read_keyword_line("m", content, validate, True)
+      
+      elif validate:
+        raise ValueError("Router descriptor contains unrecognized trailing lines: %s" % content.readline())
+      
+      else:
+        self.unrecognized_lines.append(content.readline()) # ignore unrecognized lines if we aren't validating
+  
+  def get_unrecognized_lines(self):
+    """
+    Returns any unrecognized lines.
+    
+    :returns: a list of unrecognized lines
+    """
+    
+    return self.unrecognized_lines
diff --git a/test/integ/descriptor/networkstatus.py b/test/integ/descriptor/networkstatus.py
index 07414c3..484e67d 100644
--- a/test/integ/descriptor/networkstatus.py
+++ b/test/integ/descriptor/networkstatus.py
@@ -13,6 +13,7 @@ import stem.exit_policy
 import stem.version
 import stem.descriptor.networkstatus
 import test.integ.descriptor
+from stem.descriptor.networkstatus import Flavour
 
 def _strptime(string):
   return datetime.datetime.strptime(string, "%Y-%m-%d %H:%M:%S")
@@ -256,3 +257,44 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w=
     self.assertEquals("D5C30C15BB3F1DA27669C2D88439939E8F418FCF", desc.directory_signatures[0].key_digest)
     self.assertEquals(expected_signature, desc.directory_signatures[0].signature)
 
+class TestMicrodescriptorConsensus(unittest.TestCase):
+  def test_cached_microdesc_consensus(self):
+    """
+    Parses the cached-microdesc-consensus file in our data directory.
+    """
+    
+    # lengthy test and uneffected by targets, so only run once
+    if test.runner.only_run_once(self, "test_cached_microdesc_consensus"): return
+    
+    descriptor_path = test.runner.get_runner().get_test_dir("cached-microdesc-consensus")
+    
+    if not os.path.exists(descriptor_path):
+      test.runner.skip(self, "(no cached-microdesc-consensus)")
+    
+    count = 0
+    with open(descriptor_path) as descriptor_file:
+      for desc in next(stem.descriptor.networkstatus.parse_file(descriptor_file, True, flavour = Flavour.MICRODESCRIPTOR)).router_descriptors:
+        assert desc.nickname # check that the router has a nickname
+        count += 1
+    
+    assert count > 100 # sanity check - assuming atleast 100 relays in the consensus
+  
+  def test_metrics_microdesc_consensus(self):
+    """
+    Checks if consensus documents from Metrics are parsed properly.
+    """
+    
+    descriptor_path = test.integ.descriptor.get_resource("metrics_microdesc_consensus")
+    
+    with file(descriptor_path) as descriptor_file:
+      desc = stem.descriptor.parse_file(descriptor_path, descriptor_file)
+      
+      router = next(next(desc).router_descriptors)
+      self.assertEquals("JapanAnon", router.nickname)
+      self.assertEquals("AGw/p8P246zRPQ3ZsQx9+pM8I3s", router.identity)
+      self.assertEquals("9LDw0XiFeLQDXK9t8ht4+MK9tWx6Jxp1RwP36eatRWs", router.digest)
+      self.assertEquals(_strptime("2012-07-18 15:55:42"), router.publication)
+      self.assertEquals("220.0.231.71", router.ip)
+      self.assertEquals(443, router.orport)
+      self.assertEquals(9030, router.dirport)
+





More information about the tor-commits mailing list