commit 01c516df08c192a159ed84a1404ce7771edbf037
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sat May 31 20:34:04 2014 -0700
Skipping @type annotations in descriptor parse_file() functions
At a high level stem parses descriptors by doing the following...
1. Users call stem.descriptor.parse_file().
2. The parse_file() function uses the @type annotation to guess the
descriptor type.
3. It delegates to the parse_file() function of the relevant descriptor
class. Every descriptor class has its own parse_file() function.
Karsten is starting to serve descriptor files which are multiple descriptors
concatenated together. Stem actually already pretty much handles this since I
designed our parsers to read Tor's cached descriptors (which are concatenated
descriptors as well).
The only gotcha is that @type annotations technically aren't valid descriptor
content so at step #3 the descriptor classes balk saying so. Working around
this by simply skipping @type annotations at the beginning of files.
---
stem/descriptor/extrainfo_descriptor.py | 3 +++
stem/descriptor/microdescriptor.py | 3 +++
stem/descriptor/networkstatus.py | 3 +++
stem/descriptor/server_descriptor.py | 3 +++
4 files changed, 12 insertions(+)
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index 3ccdd27..000b4ec 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -175,6 +175,9 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
if extrainfo_content:
+ if extrainfo_content[0].startswith('@type'):
+ extrainfo_content = extrainfo_content[1:]
+
if is_bridge:
yield BridgeExtraInfoDescriptor(bytes.join(b'', extrainfo_content), validate, **kwargs)
else:
diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py
index f308da4..33ad4e0 100644
--- a/stem/descriptor/microdescriptor.py
+++ b/stem/descriptor/microdescriptor.py
@@ -138,6 +138,9 @@ def _parse_file(descriptor_file, validate = True, **kwargs):
descriptor_lines.append(line)
if descriptor_lines:
+ if descriptor_lines[0].startswith('@type'):
+ descriptor_lines = descriptor_lines[1:]
+
# strip newlines from annotations
annotations = map(bytes.strip, annotations)
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index 588d4ac..fbe96fb 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -198,6 +198,9 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde
header = _read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file)
+ if header and header[0].startswith('@type'):
+ header = header[1:]
+
routers_start = document_file.tell()
_read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True)
routers_end = document_file.tell()
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 58598b2..97781da 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -143,6 +143,9 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
if descriptor_content:
+ if descriptor_content[0].startswith('@type'):
+ descriptor_content = descriptor_content[1:]
+
# strip newlines from annotations
annotations = map(bytes.strip, annotations)