commit 51141d37b9a26a8a05cfa09cf85f3939ab79011a Author: Damian Johnson atagar@torproject.org Date: Thu Dec 6 09:02:35 2012 -0800
Skip pointless extra read of network status documents
When parsing a network status document we read to the end of the router status entries solely to determine where the end of the file is. This is a waste of time and memory. Instead, checking for the end of the section as we go along. --- stem/control.py | 2 + stem/descriptor/__init__.py | 14 +++++++++-- stem/descriptor/networkstatus.py | 4 +- stem/descriptor/router_status_entry.py | 36 ++++++++++++++++++++++++------- 4 files changed, 43 insertions(+), 13 deletions(-)
diff --git a/stem/control.py b/stem/control.py index 6b19c74..0803ad4 100644 --- a/stem/control.py +++ b/stem/control.py @@ -36,7 +36,9 @@ providing its own for interacting at a higher level. |- map_address - maps one address to another such that connections to the original are replaced with the other |- get_version - convenience method to get tor version |- get_server_descriptor - querying the server descriptor for a relay + |- get_server_descriptors - provides all presently available server descriptors |- get_network_status - querying the router status entry for a relay + |- get_network_statuses - provides all preently available router status entries |- authenticate - convenience method to authenticate the controller +- protocolinfo - convenience method to get the protocol info
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 4c5841e..7291995 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -175,7 +175,7 @@ class Descriptor(object): def __str__(self): return self._raw_contents
-def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None): +def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False): """ Reads from the descriptor file until we get to one of the given keywords or reach the end of the file. @@ -187,11 +187,14 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi given keywords :param bool skip: skips buffering content, returning None :param int end_position: end if we reach this point in the file + :param bool include_ending_keyword: provides the keyword we broke on if **True**
- :returns: **list** with the lines until we find one of the keywords + :returns: **list** with the lines until we find one of the keywords, this is a two value tuple with the ending keyword if include_ending_keyword is **True** """
content = None if skip else [] + ending_keyword = None + if type(keywords) == str: keywords = (keywords,)
if ignore_first: @@ -218,6 +221,8 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi line_keyword = line_match.groups()[0]
if line_keyword in keywords: + ending_keyword = line_keyword + if not inclusive: descriptor_file.seek(last_position) elif content is not None: @@ -227,7 +232,10 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi elif content is not None: content.append(line)
- return content + if include_ending_keyword: + return (content, ending_keyword) + else: + return content
def _get_pseudo_pgp_block(remaining_contents): """ diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index ca6aca2..efe40e1 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -296,7 +296,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): validate, entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2, entry_keyword = ROUTERS_START, - section_end_keywords = V2_FOOTER_START, + section_end_keywords = (V2_FOOTER_START,), extra_args = (self,), ))
@@ -483,7 +483,7 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): validate, entry_class = router_type, entry_keyword = ROUTERS_START, - section_end_keywords = FOOTER_START, + section_end_keywords = (FOOTER_START,), extra_args = (self,), ))
diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py index 61f03e8..2d02330 100644 --- a/stem/descriptor/router_status_entry.py +++ b/stem/descriptor/router_status_entry.py @@ -50,20 +50,40 @@ def parse_file(document_file, validate, entry_class, entry_keyword = "r", start_ * **IOError** if the file can't be read """
- if start_position is None: + if start_position: + document_file.seek(start_position) + else: start_position = document_file.tell()
- if end_position is None: - if section_end_keywords: - stem.descriptor._read_until_keywords(section_end_keywords, document_file, skip = True) - end_position = document_file.tell() + # check if we're starting at the end of the section (ie, there's no entries to read) + if section_end_keywords: + first_keyword = None + line_match = stem.descriptor.KEYWORD_LINE.match(document_file.readline()) + + if line_match: + first_keyword = line_match.groups()[0] + + document_file.seek(start_position) + + if first_keyword in section_end_keywords: + return
- document_file.seek(start_position) - while not end_position or document_file.tell() < end_position: - desc_content = "".join(stem.descriptor._read_until_keywords(entry_keyword, document_file, ignore_first = True, end_position = end_position)) + while end_position is None or document_file.tell() < end_position: + desc_lines, ending_keyword = stem.descriptor._read_until_keywords( + (entry_keyword,) + section_end_keywords, + document_file, + ignore_first = True, + end_position = end_position, + include_ending_keyword = True + ) + + desc_content = "".join(desc_lines)
if desc_content: yield entry_class(desc_content, validate, *extra_args) + + # check if we stopped at the end of the section + if ending_keyword in section_end_keywords: break else: break