[tor-commits] [stem/master] Skipping @type annotations in descriptor parse_file() functions

Sun Jun 1 03:47:46 UTC 2014

commit 01c516df08c192a159ed84a1404ce7771edbf037
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat May 31 20:34:04 2014 -0700

    Skipping @type annotations in descriptor parse_file() functions
    
    At a high level stem parses descriptors by doing the following...
    
      1. Users call stem.descriptor.parse_file().
    
      2. The parse_file() function uses the @type annotation to guess the
         descriptor type.
    
      3. It delegates to the parse_file() function of the relevant descriptor
         class. Every descriptor class has its own parse_file() function.
    
    Karsten is starting to serve descriptor files which are multiple descriptors
    concatenated together. Stem actually already pretty much handles this since I
    designed our parsers to read Tor's cached descriptors (which are concatenated
    descriptors as well).
    
    The only gotcha is that @type annotations technically aren't valid descriptor
    content so at step #3 the descriptor classes balk saying so. Working around
    this by simply skipping @type annotations at the beginning of files.
---
 stem/descriptor/extrainfo_descriptor.py |    3 +++
 stem/descriptor/microdescriptor.py      |    3 +++
 stem/descriptor/networkstatus.py        |    3 +++
 stem/descriptor/server_descriptor.py    |    3 +++
 4 files changed, 12 insertions(+)

diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index 3ccdd27..000b4ec 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -175,6 +175,9 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
     extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
 
     if extrainfo_content:
+      if extrainfo_content[0].startswith('@type'):
+        extrainfo_content = extrainfo_content[1:]
+
       if is_bridge:
         yield BridgeExtraInfoDescriptor(bytes.join(b'', extrainfo_content), validate, **kwargs)
       else:
diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py
index f308da4..33ad4e0 100644
--- a/stem/descriptor/microdescriptor.py
+++ b/stem/descriptor/microdescriptor.py
@@ -138,6 +138,9 @@ def _parse_file(descriptor_file, validate = True, **kwargs):
         descriptor_lines.append(line)
 
     if descriptor_lines:
+      if descriptor_lines[0].startswith('@type'):
+        descriptor_lines = descriptor_lines[1:]
+
       # strip newlines from annotations
       annotations = map(bytes.strip, annotations)
 
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index 588d4ac..fbe96fb 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -198,6 +198,9 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde
 
   header = _read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file)
 
+  if header and header[0].startswith('@type'):
+    header = header[1:]
+
   routers_start = document_file.tell()
   _read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True)
   routers_end = document_file.tell()
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 58598b2..97781da 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -143,6 +143,9 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
     descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
 
     if descriptor_content:
+      if descriptor_content[0].startswith('@type'):
+        descriptor_content = descriptor_content[1:]
+
       # strip newlines from annotations
       annotations = map(bytes.strip, annotations)