[stem/master] Server descriptor parsing fixes

commit 8dc2d66b7dde16e0709dc5c5ff1ac0c320397f22 Author: Damian Johnson <atagar@torproject.org> Date: Fri Mar 23 22:05:29 2012 -0700 Server descriptor parsing fixes Resolving the variety of issues that were breaking the integ tests. We still don't have any tests to actually verify the server descriptor parsing, but at least we're now exercising it on some real data... and not dieing in a fire! Progress! --- stem/descriptor/__init__.py | 8 +++++--- stem/descriptor/server_descriptor.py | 24 +++++++++++++----------- test/integ/descriptor/reader.py | 4 ++-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 4d44bc0..ad17296 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -43,14 +43,16 @@ def parse_descriptors(path, descriptor_file): if first_line.startswith("router "): # server descriptor - while descriptor_file: - yield stem.descriptor.server_descriptor.parse_server_descriptors_v2(path, descriptor_file) + for desc in stem.descriptor.server_descriptor.parse_server_descriptors_v2(path, descriptor_file): + yield desc return # TODO: implement actual descriptor type recognition and parsing # TODO: add integ test for non-descriptor text content - yield Descriptor(path, descriptor_file.read()) + desc = Descriptor(descriptor_file.read()) + desc._set_path(path) + yield desc class Descriptor: """ diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index bd68f91..fe6889b 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -21,7 +21,7 @@ ENTRY_END = "router-signature" KEYWORD_CHAR = "a-zA-Z0-9-" WHITESPACE = " \t" -KEYWORD_LINE = re.compile("^([%s]+)[%s]*([%s]*)$" % (KEYWORD_CHAR, WHITESPACE, KEYWORD_CHAR)) +KEYWORD_LINE = re.compile("^([%s]+)[%s]*(.*)$" % (KEYWORD_CHAR, WHITESPACE)) PGP_BLOCK_START = re.compile("^-----BEGIN ([%s%s]+)-----$" % (KEYWORD_CHAR, WHITESPACE)) PGP_BLOCK_END = "-----END %s-----" @@ -74,7 +74,7 @@ def parse_server_descriptors_v2(path, descriptor_file): # end of the signature block # - construct a descriptor and provide it back to the caller - while descriptor_file: + while True: annotations = _read_until_keyword(ENTRY_START, descriptor_file) descriptor_content = _read_until_keyword(ENTRY_END, descriptor_file) @@ -88,9 +88,10 @@ def parse_server_descriptors_v2(path, descriptor_file): # caller. if descriptor_content: - descriptor = ServerDescriptorV2(descriptor_content, annotations = annotations) + descriptor = ServerDescriptorV2("\n".join(descriptor_content), annotations = annotations) descriptor._set_path(path) yield descriptor + else: return # done parsing descriptors def _read_until_keyword(keyword, descriptor_file, inclusive = False): """ @@ -108,12 +109,14 @@ def _read_until_keyword(keyword, descriptor_file, inclusive = False): content = [] - while descriptor_file: + while True: last_position = descriptor_file.tell() line = descriptor_file.readline() - if not line: continue # blank line - elif " " in line: line_keyword = line.split(" ", 1)[0] + if not line: break # EOF + line = line.strip() + + if " " in line: line_keyword = line.split(" ", 1)[0] else: line_keyword = line if line_keyword == keyword: @@ -259,7 +262,7 @@ class ServerDescriptorV2(stem.descriptor.Descriptor): # ignored. This prefix is being removed in... # https://trac.torproject.org/projects/tor/ticket/5124 - line = line.lstrip("opt ") + if line.startswith("opt "): line = line[4:] line_match = KEYWORD_LINE.match(line) @@ -278,7 +281,6 @@ class ServerDescriptorV2(stem.descriptor.Descriptor): entries[keyword] = [(value, block_type, block_contents)] # validates restrictions about the entries - if validate: for keyword in REQUIRED_FIELDS: if not keyword in entries: @@ -338,9 +340,9 @@ class ServerDescriptorV2(stem.descriptor.Descriptor): elif not bandwidth_comp[2].isdigit(): raise ValueError("Bandwidth line's observed rate isn't numeric: %s" % bandwidth_comp[2]) - self.average_bandwidth = int(router_comp[0]) - self.burst_bandwidth = int(router_comp[1]) - self.observed_bandwidth = int(router_comp[2]) + self.average_bandwidth = int(bandwidth_comp[0]) + self.burst_bandwidth = int(bandwidth_comp[1]) + self.observed_bandwidth = int(bandwidth_comp[2]) elif keyword == "platform": # "platform" string diff --git a/test/integ/descriptor/reader.py b/test/integ/descriptor/reader.py index cdc33c2..7f2a425 100644 --- a/test/integ/descriptor/reader.py +++ b/test/integ/descriptor/reader.py @@ -51,7 +51,7 @@ def _get_raw_tar_descriptors(): for tar_entry in tar_file: if tar_entry.isfile(): entry = tar_file.extractfile(tar_entry) - raw_descriptors.append(entry.read()) + raw_descriptors.append(entry.read().strip()) entry.close() TAR_DESCRIPTORS = raw_descriptors @@ -149,7 +149,7 @@ class TestDescriptorReader(unittest.TestCase): descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, "example_descriptor") with open(descriptor_path) as descriptor_file: - descriptor_entries.append(descriptor_file.read()) + descriptor_entries.append(descriptor_file.read().strip()) # running this test multiple times to flush out concurrency issues for i in xrange(15):
participants (1)
-
atagar@torproject.org