commit eb0e424ed9459188b70f33ff401e23e9fd89138b Author: Ravi Chandra Padmala neenaoffline@gmail.com Date: Wed Aug 8 12:39:55 2012 +0530
Implement lazy router descriptor reading --- stem/descriptor/__init__.py | 60 +++++++++++++++++++++++++++++++++++++- stem/descriptor/networkstatus.py | 36 ++++++++++++++++------- 2 files changed, 84 insertions(+), 12 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index b1f3ab6..168b357 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -148,7 +148,31 @@ class Descriptor(object): def __str__(self): return self._raw_contents
-def _read_until_keywords(keywords, descriptor_file, inclusive = False): +def _peek_keyword(descriptor_file): + """ + Returns the keyword at the current offset of descriptor_file. Respects the + "opt" keyword and returns the next keyword instead. + + :param file descriptor_file: file with the descriptor content + + :returns: keyword at the current offset of descriptor_file + """ + + last_position = descriptor_file.tell() + line = descriptor_file.readline() + if not line: return None + + if " " in line: + keyword = line.split(" ", 1)[0] + if keyword == "opt": + keyword = line.split(" ", 2)[1] + else: keyword = line.strip() + + descriptor_file.seek(last_position) + + return keyword + +def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False): """ Reads from the descriptor file until we get to one of the given keywords or reach the end of the file. @@ -156,6 +180,7 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False): :param str,list keywords: keyword(s) we want to read until :param file descriptor_file: file with the descriptor content :param bool inclusive: includes the line with the keyword if True + :param bool ignore_first: doesn't check if the first line read has one of the given keywords
:returns: list with the lines until we find one of the keywords """ @@ -163,6 +188,10 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False): content = [] if type(keywords) == str: keywords = (keywords,)
+ if ignore_first: + content.append(descriptor_file.readline()) + if content == [None]: return [] + while True: last_position = descriptor_file.tell() line = descriptor_file.readline() @@ -181,6 +210,35 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False):
return content
+def _skip_until_keywords(keywords, descriptor_file, inclusive = False): + """ + Reads and discards lines of data from the descriptor file until we get to one + of the given keywords or reach the end of the file. + + :param str,list keywords: keyword(s) we want to skip until + :param file descriptor_file: file with the descriptor content + :param bool inclusive: includes the line with the keyword if True + + :returns: descriptor_file with the new offset + """ + + if type(keywords) == str: keywords = (keywords,) + + while True: + last_position = descriptor_file.tell() + line = descriptor_file.readline() + if not line: break # EOF + + if " " in line: line_keyword = line.split(" ", 1)[0] + else: line_keyword = line.strip() + + if line_keyword in keywords: + if not inclusive: descriptor_file.seek(last_position) + + break + + return descriptor_file + def _get_pseudo_pgp_block(remaining_contents): """ Checks if given contents begins with a pseudo-Open-PGP-style block and, if diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index e4cfda1..a51fcbd 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -40,14 +40,19 @@ The documents can be obtained from any of the following sources...
import re import datetime +from StringIO import StringIO
import stem.descriptor import stem.version import stem.exit_policy
+from stem.descriptor import _read_until_keywords, _skip_until_keywords, _peek_keyword + _bandwidth_weights_regex = re.compile(" ".join(["W%s=\d+" % weight for weight in ["bd", "be", "bg", "bm", "db", "eb", "ed", "ee", "eg", "em", "gb", "gd", "gg", "gm", "mb", "md", "me", "mg", "mm"]]))
+_router_desc_end_kws = ["r", "bandwidth-weights", "directory-footer", "directory-signature"] + def parse_file(document_file, validate = True): """ Iterates over the router descriptors in a network status document. @@ -62,13 +67,30 @@ def parse_file(document_file, validate = True): * IOError if the file can't be read """
- return NetworkStatusDocument(document_file.read(), validate).router_descriptors + # parse until "r" + document_data = "".join(_read_until_keywords("r", document_file)) + # store offset + r_offset = document_file.tell() + # skip until end of router descriptors + _skip_until_keywords(["bandwidth-weights", "directory-footer", "directory-signature"], document_file) + # parse until end + document_data = document_data + document_file.read() + document = NetworkStatusDocument(document_data, validate) + document_file.seek(r_offset) + document.router_descriptors = _router_desc_generator(document_file, document.vote_status == "vote", validate) + return document.router_descriptors
def _strptime(string, validate = True, optional = False): try: return datetime.datetime.strptime(string, "%Y-%m-%d %H:%M:%S") except ValueError, exc: if validate or not optional: raise exc + else: return None + +def _router_desc_generator(document_file, vote, validate): + while _peek_keyword(document_file) == "r": + desc_content = "".join(_read_until_keywords(_router_desc_end_kws, document_file, False, True)) + yield RouterDescriptor(desc_content, vote, validate)
class NetworkStatusDocument(stem.descriptor.Descriptor): """ @@ -193,21 +215,13 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
# authority section while doc_parser.line.startswith("dir-source "): - dirauth_data = doc_parser.read_until(["dir-source", "r"]) + dirauth_data = doc_parser.read_until(["dir-source", "r", "directory-footer", "directory-signature", "bandwidth-weights"]) self.directory_authorities.append(DirectoryAuthority(dirauth_data, vote, validate))
- def _router_desc_generator(raw_content, vote, validate): - parser = stem.descriptor.DescriptorParser(raw_content, validate) - while parser.line != None: - descriptor = parser.read_until("r") - yield self._generate_router(descriptor, vote, validate) - # router descriptors if doc_parser.peek_keyword() == "r": router_descriptors_data = doc_parser.read_until(["bandwidth-weights", "directory-footer", "directory-signature"]) - self.router_descriptors = _router_desc_generator(router_descriptors_data, vote, validate) - elif validate: - raise ValueError("No router descriptors found") + self.router_descriptors = _router_desc_generator(StringIO(router_descriptors_data), vote, validate)
# footer section if self.consensus_method > 9 or vote and filter(lambda x: x >= 9, self.consensus_methods):
tor-commits@lists.torproject.org