[tor-commits] [stem/master] Server descriptor parsing fixes

atagar at torproject.org atagar at torproject.org
Mon Mar 26 00:10:01 UTC 2012


commit 8dc2d66b7dde16e0709dc5c5ff1ac0c320397f22
Author: Damian Johnson <atagar at torproject.org>
Date:   Fri Mar 23 22:05:29 2012 -0700

    Server descriptor parsing fixes
    
    Resolving the variety of issues that were breaking the integ tests. We still
    don't have any tests to actually verify the server descriptor parsing, but
    at least we're now exercising it on some real data... and not dieing in a fire!
    Progress!
---
 stem/descriptor/__init__.py          |    8 +++++---
 stem/descriptor/server_descriptor.py |   24 +++++++++++++-----------
 test/integ/descriptor/reader.py      |    4 ++--
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 4d44bc0..ad17296 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -43,14 +43,16 @@ def parse_descriptors(path, descriptor_file):
   
   if first_line.startswith("router "):
     # server descriptor
-    while descriptor_file:
-      yield stem.descriptor.server_descriptor.parse_server_descriptors_v2(path, descriptor_file)
+    for desc in stem.descriptor.server_descriptor.parse_server_descriptors_v2(path, descriptor_file):
+      yield desc
     
     return
   
   # TODO: implement actual descriptor type recognition and parsing
   # TODO: add integ test for non-descriptor text content
-  yield Descriptor(path, descriptor_file.read())
+  desc = Descriptor(descriptor_file.read())
+  desc._set_path(path)
+  yield desc
 
 class Descriptor:
   """
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index bd68f91..fe6889b 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -21,7 +21,7 @@ ENTRY_END   = "router-signature"
 
 KEYWORD_CHAR    = "a-zA-Z0-9-"
 WHITESPACE      = " \t"
-KEYWORD_LINE    = re.compile("^([%s]+)[%s]*([%s]*)$" % (KEYWORD_CHAR, WHITESPACE, KEYWORD_CHAR))
+KEYWORD_LINE    = re.compile("^([%s]+)[%s]*(.*)$" % (KEYWORD_CHAR, WHITESPACE))
 PGP_BLOCK_START = re.compile("^-----BEGIN ([%s%s]+)-----$" % (KEYWORD_CHAR, WHITESPACE))
 PGP_BLOCK_END   = "-----END %s-----"
 
@@ -74,7 +74,7 @@ def parse_server_descriptors_v2(path, descriptor_file):
   #     end of the signature block
   #   - construct a descriptor and provide it back to the caller
   
-  while descriptor_file:
+  while True:
     annotations = _read_until_keyword(ENTRY_START, descriptor_file)
     descriptor_content = _read_until_keyword(ENTRY_END, descriptor_file)
     
@@ -88,9 +88,10 @@ def parse_server_descriptors_v2(path, descriptor_file):
     # caller.
     
     if descriptor_content:
-      descriptor = ServerDescriptorV2(descriptor_content, annotations = annotations)
+      descriptor = ServerDescriptorV2("\n".join(descriptor_content), annotations = annotations)
       descriptor._set_path(path)
       yield descriptor
+    else: return # done parsing descriptors
 
 def _read_until_keyword(keyword, descriptor_file, inclusive = False):
   """
@@ -108,12 +109,14 @@ def _read_until_keyword(keyword, descriptor_file, inclusive = False):
   
   content = []
   
-  while descriptor_file:
+  while True:
     last_position = descriptor_file.tell()
     line = descriptor_file.readline()
     
-    if not line: continue # blank line
-    elif " " in line: line_keyword = line.split(" ", 1)[0]
+    if not line: break # EOF
+    line = line.strip()
+    
+    if " " in line: line_keyword = line.split(" ", 1)[0]
     else: line_keyword = line
     
     if line_keyword == keyword:
@@ -259,7 +262,7 @@ class ServerDescriptorV2(stem.descriptor.Descriptor):
       # ignored. This prefix is being removed in...
       # https://trac.torproject.org/projects/tor/ticket/5124
       
-      line = line.lstrip("opt ")
+      if line.startswith("opt "): line = line[4:]
       
       line_match = KEYWORD_LINE.match(line)
       
@@ -278,7 +281,6 @@ class ServerDescriptorV2(stem.descriptor.Descriptor):
         entries[keyword] = [(value, block_type, block_contents)]
     
     # validates restrictions about the entries
-    
     if validate:
       for keyword in REQUIRED_FIELDS:
         if not keyword in entries:
@@ -338,9 +340,9 @@ class ServerDescriptorV2(stem.descriptor.Descriptor):
           elif not bandwidth_comp[2].isdigit():
             raise ValueError("Bandwidth line's observed rate isn't numeric: %s" % bandwidth_comp[2])
         
-        self.average_bandwidth  = int(router_comp[0])
-        self.burst_bandwidth    = int(router_comp[1])
-        self.observed_bandwidth = int(router_comp[2])
+        self.average_bandwidth  = int(bandwidth_comp[0])
+        self.burst_bandwidth    = int(bandwidth_comp[1])
+        self.observed_bandwidth = int(bandwidth_comp[2])
       elif keyword == "platform":
         # "platform" string
         
diff --git a/test/integ/descriptor/reader.py b/test/integ/descriptor/reader.py
index cdc33c2..7f2a425 100644
--- a/test/integ/descriptor/reader.py
+++ b/test/integ/descriptor/reader.py
@@ -51,7 +51,7 @@ def _get_raw_tar_descriptors():
       for tar_entry in tar_file:
         if tar_entry.isfile():
           entry = tar_file.extractfile(tar_entry)
-          raw_descriptors.append(entry.read())
+          raw_descriptors.append(entry.read().strip())
           entry.close()
     
     TAR_DESCRIPTORS = raw_descriptors
@@ -149,7 +149,7 @@ class TestDescriptorReader(unittest.TestCase):
     
     descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, "example_descriptor")
     with open(descriptor_path) as descriptor_file:
-      descriptor_entries.append(descriptor_file.read())
+      descriptor_entries.append(descriptor_file.read().strip())
     
     # running this test multiple times to flush out concurrency issues
     for i in xrange(15):





More information about the tor-commits mailing list