[tor-commits] [stem/master] Getting rid of the _get_document_content() helper

atagar at torproject.org atagar at torproject.org
Sat Oct 13 18:35:45 UTC 2012


commit e4185194801b6afd10671e6e7a1a53977a598c29
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Sep 22 13:18:16 2012 -0700

    Getting rid of the _get_document_content() helper
    
    Too many helper functions and the code becomes fragmented. The
    _get_document_content() was only used a couple places, and both of those were
    actually better with their own slightly different implementations.
---
 stem/descriptor/networkstatus.py |   90 +++++++++++++------------------------
 1 files changed, 32 insertions(+), 58 deletions(-)

diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index f279805..030d413 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -131,59 +131,28 @@ def parse_file(document_file, validate = True, is_microdescriptor = False):
     * IOError if the file can't be read
   """
   
-  header, authorities, footer, routers_end = _get_document_content(document_file, validate)
-  document_data = header + authorities + footer
+  # getting the document without the routers section
   
-  if not is_microdescriptor:
-    document = NetworkStatusDocument(document_data, validate)
-    router_type = RouterStatusEntry
-  else:
-    document = MicrodescriptorConsensus(document_data, validate)
-    router_type = RouterMicrodescriptor
-  
-  for desc in _get_routers(document_file, validate, document, routers_end, router_type):
-    yield desc
-
-def _get_document_content(document_file, validate):
-  """
-  Network status documents consist of four sections:
-  * header
-  * authority entries
-  * router entries
-  * footer
-  
-  This provides back a tuple with the following...
-  (header, authorities, footer, routers_end)
-  
-  This leaves the document_file at the start of the router entries.
-  
-  :param file document_file: file with network status document content
-  :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise
-  
-  :returns: tuple with the network status document content and ending position of the routers
-  
-  :raises:
-    * ValueError if the contents is malformed and validate is True
-    * IOError if the file can't be read
-  """
-  
-  # parse until the first record of a following section
-  header = _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)
-  authorities = _read_until_keywords((ROUTERS_START, FOOTER_START), document_file)
+  header = _read_until_keywords((ROUTERS_START, FOOTER_START), document_file)
   
-  # skip router section, just taking note of the position
   routers_start = document_file.tell()
   _read_until_keywords(FOOTER_START, document_file, skip = True)
   routers_end = document_file.tell()
   
   footer = document_file.readlines()
+  document_content = header + footer
   
-  # leave our position at the start of the router section
-  document_file.seek(routers_start)
+  if not is_microdescriptor:
+    document = NetworkStatusDocument(document_content, validate)
+    router_type = RouterStatusEntry
+  else:
+    document = MicrodescriptorConsensus(document_content, validate)
+    router_type = RouterMicrodescriptor
   
-  return ("".join(header), "".join(authorities), "".join(footer), routers_end)
+  for desc in _get_routers(document_file, validate, document, routers_start, routers_end, router_type):
+    yield desc
 
-def _get_routers(document_file, validate, document, end_position, router_type):
+def _get_routers(document_file, validate, document, start_position, end_position, router_type):
   """
   Iterates over the router entries in a given document. The document_file is
   expected to be at the start of the router section and the end_position
@@ -192,7 +161,8 @@ def _get_routers(document_file, validate, document, end_position, router_type):
   :param file document_file: file with network status document content
   :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise
   :param object document: document the descriptors originate from
-  :param int end_position: location in the document_file where the router section ends
+  :param int start_position: start of the routers section
+  :param int end_position: end of the routers section
   :param class router_type: router class to construct
   
   :returns: iterator over router_type instances
@@ -202,15 +172,16 @@ def _get_routers(document_file, validate, document, end_position, router_type):
     * IOError if the file can't be read
   """
   
+  document_file.seek(start_position)
   while document_file.tell() < end_position:
     desc_content = "".join(_read_until_keywords("r", document_file, ignore_first = True, end_position = end_position))
     yield router_type(desc_content, document, validate)
 
-def _get_authorities(authority_lines, is_vote, validate):
+def _get_authorities(authorities, is_vote, validate):
   """
   Iterates over the authoritiy entries in given content.
   
-  :param list authority_lines: lines of content to be parsed
+  :param str authority_lines: content of the authorities section
   :param bool is_vote: indicates if this is for a vote or contensus document
   :param bool validate: True if the document is to be validated, False otherwise
   
@@ -221,7 +192,7 @@ def _get_authorities(authority_lines, is_vote, validate):
   
   auth_buffer = []
   
-  for line in authority_lines:
+  for line in authorities.split("\n"):
     if not line: continue
     elif line.startswith(AUTH_START) and auth_buffer:
       yield DirectoryAuthority("\n".join(auth_buffer), is_vote, validate)
@@ -276,27 +247,30 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
     """
     
     super(NetworkStatusDocument, self).__init__(raw_content)
+    document_file = StringIO(raw_content)
     
-    self.directory_authorities = []
-    self._unrecognized_lines = []
+    header = _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)
+    self._header = _DocumentHeader("".join(header), validate, default_params)
     
-    document_file = StringIO(raw_content)
-    header_content, authority_content, footer_content, routers_end = _get_document_content(document_file, validate)
+    authorities = _read_until_keywords((ROUTERS_START, FOOTER_START), document_file)
+    self.directory_authorities = list(_get_authorities("".join(authorities), self._header.is_vote, validate))
     
-    self._header = _DocumentHeader(header_content, validate, default_params)
-    self._footer = _DocumentFooter(footer_content, validate, self._header)
-    self.directory_authorities = list(_get_authorities(authority_content.split("\n"), self._header.is_vote, validate))
+    routers_start = document_file.tell()
+    _read_until_keywords(FOOTER_START, document_file, skip = True)
+    routers_end = document_file.tell()
     
+    self._footer = _DocumentFooter(document_file.read(), validate, self._header)
+    
+    self._unrecognized_lines = []
+    
+    # copy the header and footer attributes into us
     for attr, value in vars(self._header).items() + vars(self._footer).items():
       if attr != "_unrecognized_lines":
         setattr(self, attr, value)
       else:
         self._unrecognized_lines += value
     
-    if document_file.tell() < routers_end:
-      self.routers = tuple(_get_routers(document_file, validate, self, routers_end, self._get_router_type()))
-    else:
-      self.routers = ()
+    self.routers = tuple(_get_routers(document_file, validate, self, routers_start, routers_end, self._get_router_type()))
   
   def _get_router_type(self):
     return RouterStatusEntry





More information about the tor-commits mailing list