[tor-commits] [stem/master] Generalizing how router entries and authorities are parsed

atagar at torproject.org atagar at torproject.org
Sat Oct 13 18:35:45 UTC 2012


commit a5babce203784eb45f7c585edbcee4caf2362212
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Sep 22 14:12:21 2012 -0700

    Generalizing how router entries and authorities are parsed
    
    The _get_routers() and _get_authorities() were essentially doing the same
    thing. Replacing both with a more general _get_entries() helper that reads a
    range of the document and constructs instances for it.
    
    Taking advantage of this nicer helper's keyword arguments to make the code more
    readable (functions that take a ton of positional args are is a pita).
    
    I'm a bit surprised (and concerned) at how easily this passed unit tests. No
    doubt I've broken the integ tests but I'm not putting any effort there until
    I've finished the document parser rewrite.
---
 stem/descriptor/networkstatus.py               |  119 +++++++++++++-----------
 test/unit/descriptor/networkstatus/document.py |    4 +-
 test/unit/descriptor/networkstatus/entry.py    |   22 ++--
 3 files changed, 76 insertions(+), 69 deletions(-)

diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index 030d413..8774b95 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -149,59 +149,58 @@ def parse_file(document_file, validate = True, is_microdescriptor = False):
     document = MicrodescriptorConsensus(document_content, validate)
     router_type = RouterMicrodescriptor
   
-  for desc in _get_routers(document_file, validate, document, routers_start, routers_end, router_type):
+  desc_iterator = _get_entries(
+    document_file,
+    validate,
+    entry_class = router_type,
+    entry_keyword = ROUTERS_START,
+    start_position = routers_start,
+    end_position = routers_end,
+    extra_args = (document,),
+  )
+  
+  for desc in desc_iterator:
     yield desc
 
-def _get_routers(document_file, validate, document, start_position, end_position, router_type):
+def _get_entries(document_file, validate, entry_class, entry_keyword, start_position = None, end_position = None, section_end_keywords = (), extra_args = ()):
   """
-  Iterates over the router entries in a given document. The document_file is
-  expected to be at the start of the router section and the end_position
-  desigates where that section ends.
+  Reads a range of the document_file containing some number of entry_class
+  instances. We deliminate the entry_class entries by the keyword on their
+  first line (entry_keyword). When finished the document is left at the
+  end_position.
+  
+  Either a end_position or section_end_keywords must be provided.
   
   :param file document_file: file with network status document content
   :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise
-  :param object document: document the descriptors originate from
-  :param int start_position: start of the routers section
-  :param int end_position: end of the routers section
-  :param class router_type: router class to construct
+  :param class entry_class: class to construct instance for
+  :param str entry_keyword: first keyword for the entry instances
+  :param int start_position: start of the section, default is the current position
+  :param int end_position: end of the section
+  :param tuple section_end_keywords: keyword(s) that deliminate the end of the section if no end_position was provided
+  :param tuple extra_args: extra arguments for the entry_class (after the content and validate flag)
   
-  :returns: iterator over router_type instances
+  :returns: iterator over entry_class instances
   
   :raises:
     * ValueError if the contents is malformed and validate is True
     * IOError if the file can't be read
   """
   
-  document_file.seek(start_position)
-  while document_file.tell() < end_position:
-    desc_content = "".join(_read_until_keywords("r", document_file, ignore_first = True, end_position = end_position))
-    yield router_type(desc_content, document, validate)
-
-def _get_authorities(authorities, is_vote, validate):
-  """
-  Iterates over the authoritiy entries in given content.
-  
-  :param str authority_lines: content of the authorities section
-  :param bool is_vote: indicates if this is for a vote or contensus document
-  :param bool validate: True if the document is to be validated, False otherwise
-  
-  :returns: DirectoryAuthority entries represented by the content
-  
-  :raises: ValueError if the document is invalid
-  """
+  if start_position is None:
+    start_position = document_file.tell()
   
-  auth_buffer = []
-  
-  for line in authorities.split("\n"):
-    if not line: continue
-    elif line.startswith(AUTH_START) and auth_buffer:
-      yield DirectoryAuthority("\n".join(auth_buffer), is_vote, validate)
-      auth_buffer = []
-    
-    auth_buffer.append(line)
+  if end_position is None:
+    if section_end_keywords:
+      _read_until_keywords(section_end_keywords, document_file, skip = True)
+      end_position = document_file.tell()
+    else:
+      raise ValueError("Either a end_position or section_end_keywords must be provided")
   
-  if auth_buffer:
-    yield DirectoryAuthority("\n".join(auth_buffer), is_vote, validate)
+  document_file.seek(start_position)
+  while document_file.tell() < end_position:
+    desc_content = "".join(_read_until_keywords(entry_keyword, document_file, ignore_first = True, end_position = end_position))
+    yield router_type(desc_content, validate, *extra_args)
 
 class NetworkStatusDocument(stem.descriptor.Descriptor):
   """
@@ -249,18 +248,27 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
     super(NetworkStatusDocument, self).__init__(raw_content)
     document_file = StringIO(raw_content)
     
-    header = _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)
-    self._header = _DocumentHeader("".join(header), validate, default_params)
-    
-    authorities = _read_until_keywords((ROUTERS_START, FOOTER_START), document_file)
-    self.directory_authorities = list(_get_authorities("".join(authorities), self._header.is_vote, validate))
+    self._header = _DocumentHeader(document_file, validate, default_params)
     
-    routers_start = document_file.tell()
-    _read_until_keywords(FOOTER_START, document_file, skip = True)
-    routers_end = document_file.tell()
+    self.directory_authorities = tuple(_get_entries(
+      document_file,
+      validate,
+      entry_class = DirectoryAuthority,
+      entry_keyword = AUTH_START,
+      section_end_keywords = (ROUTERS_START, FOOTER_START),
+      extra_args = (self._header.is_vote,),
+    ))
     
-    self._footer = _DocumentFooter(document_file.read(), validate, self._header)
+    self.routers = tuple(_get_entries(
+      document_file,
+      validate,
+      entry_class = self._get_router_type(),
+      entry_keyword = ROUTERS_START,
+      section_end_keywords = FOOTER_START,
+      extra_args = (self,),
+    ))
     
+    self._footer = _DocumentFooter(document_file, validate, self._header)
     self._unrecognized_lines = []
     
     # copy the header and footer attributes into us
@@ -269,8 +277,6 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
         setattr(self, attr, value)
       else:
         self._unrecognized_lines += value
-    
-    self.routers = tuple(_get_routers(document_file, validate, self, routers_start, routers_end, self._get_router_type()))
   
   def _get_router_type(self):
     return RouterStatusEntry
@@ -292,7 +298,7 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
     return list(self._unrecognized_lines)
 
 class _DocumentHeader(object):
-  def __init__(self, content, validate, default_params):
+  def __init__(self, document_file, validate, default_params):
     self.version = None
     self.is_consensus = True
     self.is_vote = False
@@ -311,6 +317,7 @@ class _DocumentHeader(object):
     
     self._unrecognized_lines = []
     
+    content = "".join(_read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file))
     entries = stem.descriptor._get_descriptor_components(content, validate)[0]
     self._parse(entries, validate)
     
@@ -485,12 +492,12 @@ class _DocumentHeader(object):
         raise ValueError("'%s' value on the params line must be in the range of %i - %i, was %i" % (key, minimum, maximum, value))
 
 class _DocumentFooter(object):
-  def __init__(self, content, validate, header):
+  def __init__(self, document_file, validate, header):
     self.signatures = []
     self.bandwidth_weights = {}
-    
     self._unrecognized_lines = []
     
+    content = document_file.read()
     if validate and content and not header.meets_consensus_method(9):
       raise ValueError("Network status document's footer should only apepar in consensus-method 9 or later")
     elif not content and not header.meets_consensus_method(9):
@@ -655,7 +662,7 @@ class DirectoryAuthority(stem.descriptor.Descriptor):
   | legacy_dir_key is the only optional attribute
   """
   
-  def __init__(self, raw_content, vote = True, validate = True):
+  def __init__(self, raw_content, validate, vote = True):
     """
     Parse a directory authority entry in a v3 network status document and
     provide a DirectoryAuthority object.
@@ -765,7 +772,7 @@ class RouterStatusEntry(stem.descriptor.Descriptor):
   **\*** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined
   """
   
-  def __init__(self, raw_contents, document, validate = True):
+  def __init__(self, raw_contents, validate = True, document = None):
     """
     Parse a router descriptor in a v3 network status document.
     
@@ -1045,7 +1052,7 @@ class RouterMicrodescriptor(RouterStatusEntry):
   | **\*** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined
   """
   
-  def __init__(self, raw_contents, document, validate = True):
+  def __init__(self, raw_contents, validate, document):
     """
     Parse a router descriptor in a v3 microdescriptor consensus and provide a new
     RouterMicrodescriptor object.
@@ -1057,7 +1064,7 @@ class RouterMicrodescriptor(RouterStatusEntry):
     :raises: ValueError if the descriptor data is invalid
     """
     
-    super(RouterMicrodescriptor, self).__init__(raw_contents, document, validate)
+    super(RouterMicrodescriptor, self).__init__(raw_contents, validate, document)
     
     self.document = document
   
diff --git a/test/unit/descriptor/networkstatus/document.py b/test/unit/descriptor/networkstatus/document.py
index a0fe3a8..ad79dde 100644
--- a/test/unit/descriptor/networkstatus/document.py
+++ b/test/unit/descriptor/networkstatus/document.py
@@ -119,7 +119,7 @@ class TestNetworkStatusDocument(unittest.TestCase):
     self.assertEqual([], document.server_versions)
     self.assertEqual(expected_known_flags, document.known_flags)
     self.assertEqual(DEFAULT_PARAMS, document.params)
-    self.assertEqual([], document.directory_authorities)
+    self.assertEqual((), document.directory_authorities)
     self.assertEqual({}, document.bandwidth_weights)
     self.assertEqual([SIG], document.signatures)
     self.assertEqual([], document.get_unrecognized_lines())
@@ -151,7 +151,7 @@ class TestNetworkStatusDocument(unittest.TestCase):
     self.assertEqual([], document.server_versions)
     self.assertEqual(expected_known_flags, document.known_flags)
     self.assertEqual(DEFAULT_PARAMS, document.params)
-    self.assertEqual([], document.directory_authorities)
+    self.assertEqual((), document.directory_authorities)
     self.assertEqual({}, document.bandwidth_weights)
     self.assertEqual([SIG], document.signatures)
     self.assertEqual([], document.get_unrecognized_lines())
diff --git a/test/unit/descriptor/networkstatus/entry.py b/test/unit/descriptor/networkstatus/entry.py
index 7c9e051..6380c62 100644
--- a/test/unit/descriptor/networkstatus/entry.py
+++ b/test/unit/descriptor/networkstatus/entry.py
@@ -73,7 +73,7 @@ class TestRouterStatusEntry(unittest.TestCase):
     Parses a minimal router status entry.
     """
     
-    entry = RouterStatusEntry(get_router_status_entry(), None)
+    entry = RouterStatusEntry(get_router_status_entry())
     
     expected_flags = set([Flag.FAST, Flag.NAMED, Flag.RUNNING, Flag.STABLE, Flag.VALID])
     self.assertEqual(None, entry.document)
@@ -114,7 +114,7 @@ class TestRouterStatusEntry(unittest.TestCase):
     """
     
     content = get_router_status_entry({'z': 'New tor feature: sparkly unicorns!'})
-    entry = RouterStatusEntry(content, None)
+    entry = RouterStatusEntry(content)
     self.assertEquals(['z New tor feature: sparkly unicorns!'], entry.get_unrecognized_lines())
   
   def test_proceeding_line(self):
@@ -131,7 +131,7 @@ class TestRouterStatusEntry(unittest.TestCase):
     """
     
     content = get_router_status_entry() + "\n\nv Tor 0.2.2.35\n\n"
-    entry = RouterStatusEntry(content, None)
+    entry = RouterStatusEntry(content)
     self.assertEqual("Tor 0.2.2.35", entry.version_line)
   
   def test_missing_r_field(self):
@@ -293,7 +293,7 @@ class TestRouterStatusEntry(unittest.TestCase):
     
     for s_line, expected in test_values.items():
       content = get_router_status_entry({'s': s_line})
-      entry = RouterStatusEntry(content, None)
+      entry = RouterStatusEntry(content)
       self.assertEquals(expected, entry.flags)
     
     # tries some invalid inputs
@@ -321,7 +321,7 @@ class TestRouterStatusEntry(unittest.TestCase):
     
     for v_line, expected in test_values.items():
       content = get_router_status_entry({'v': v_line})
-      entry = RouterStatusEntry(content, None)
+      entry = RouterStatusEntry(content)
       self.assertEquals(expected, entry.version)
       self.assertEquals(v_line, entry.version_line)
     
@@ -343,7 +343,7 @@ class TestRouterStatusEntry(unittest.TestCase):
     
     for w_line, expected in test_values.items():
       content = get_router_status_entry({'w': w_line})
-      entry = RouterStatusEntry(content, None)
+      entry = RouterStatusEntry(content)
       self.assertEquals(expected[0], entry.bandwidth)
       self.assertEquals(expected[1], entry.measured)
       self.assertEquals(expected[2], entry.unrecognized_bandwidth_entries)
@@ -378,7 +378,7 @@ class TestRouterStatusEntry(unittest.TestCase):
     
     for p_line, expected in test_values.items():
       content = get_router_status_entry({'p': p_line})
-      entry = RouterStatusEntry(content, None)
+      entry = RouterStatusEntry(content)
       self.assertEquals(expected, entry.exit_policy)
     
     # tries some invalid inputs
@@ -414,7 +414,7 @@ class TestRouterStatusEntry(unittest.TestCase):
     
     for m_line, expected in test_values.items():
       content = get_router_status_entry({'m': m_line})
-      entry = RouterStatusEntry(content, mock_document)
+      entry = RouterStatusEntry(content, document = mock_document)
       self.assertEquals(expected, entry.microdescriptor_hashes)
     
     # try without a document
@@ -430,7 +430,7 @@ class TestRouterStatusEntry(unittest.TestCase):
     
     for m_line in test_values:
       content = get_router_status_entry({'m': m_line})
-      self.assertRaises(ValueError, RouterStatusEntry, content, mock_document)
+      self.assertRaises(ValueError, RouterStatusEntry, content, True, mock_document)
   
   def _expect_invalid_attr(self, content, attr = None, expected_value = None):
     """
@@ -439,8 +439,8 @@ class TestRouterStatusEntry(unittest.TestCase):
     value when we're constructed without validation.
     """
     
-    self.assertRaises(ValueError, RouterStatusEntry, content, None)
-    entry = RouterStatusEntry(content, None, False)
+    self.assertRaises(ValueError, RouterStatusEntry, content)
+    entry = RouterStatusEntry(content, False)
     
     if attr:
       self.assertEquals(expected_value, getattr(entry, attr))





More information about the tor-commits mailing list