commit 01c516df08c192a159ed84a1404ce7771edbf037 Author: Damian Johnson atagar@torproject.org Date: Sat May 31 20:34:04 2014 -0700
Skipping @type annotations in descriptor parse_file() functions
At a high level stem parses descriptors by doing the following...
1. Users call stem.descriptor.parse_file().
2. The parse_file() function uses the @type annotation to guess the descriptor type.
3. It delegates to the parse_file() function of the relevant descriptor class. Every descriptor class has its own parse_file() function.
Karsten is starting to serve descriptor files which are multiple descriptors concatenated together. Stem actually already pretty much handles this since I designed our parsers to read Tor's cached descriptors (which are concatenated descriptors as well).
The only gotcha is that @type annotations technically aren't valid descriptor content so at step #3 the descriptor classes balk saying so. Working around this by simply skipping @type annotations at the beginning of files. --- stem/descriptor/extrainfo_descriptor.py | 3 +++ stem/descriptor/microdescriptor.py | 3 +++ stem/descriptor/networkstatus.py | 3 +++ stem/descriptor/server_descriptor.py | 3 +++ 4 files changed, 12 insertions(+)
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index 3ccdd27..000b4ec 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -175,6 +175,9 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs): extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
if extrainfo_content: + if extrainfo_content[0].startswith('@type'): + extrainfo_content = extrainfo_content[1:] + if is_bridge: yield BridgeExtraInfoDescriptor(bytes.join(b'', extrainfo_content), validate, **kwargs) else: diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py index f308da4..33ad4e0 100644 --- a/stem/descriptor/microdescriptor.py +++ b/stem/descriptor/microdescriptor.py @@ -138,6 +138,9 @@ def _parse_file(descriptor_file, validate = True, **kwargs): descriptor_lines.append(line)
if descriptor_lines: + if descriptor_lines[0].startswith('@type'): + descriptor_lines = descriptor_lines[1:] + # strip newlines from annotations annotations = map(bytes.strip, annotations)
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index 588d4ac..fbe96fb 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -198,6 +198,9 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde
header = _read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file)
+ if header and header[0].startswith('@type'): + header = header[1:] + routers_start = document_file.tell() _read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True) routers_end = document_file.tell() diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index 58598b2..97781da 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -143,6 +143,9 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs): descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
if descriptor_content: + if descriptor_content[0].startswith('@type'): + descriptor_content = descriptor_content[1:] + # strip newlines from annotations annotations = map(bytes.strip, annotations)