commit 51141d37b9a26a8a05cfa09cf85f3939ab79011a
Author: Damian Johnson <atagar(a)torproject.org>
Date: Thu Dec 6 09:02:35 2012 -0800
Skip pointless extra read of network status documents
When parsing a network status document we read to the end of the router status
entries solely to determine where the end of the file is. This is a waste of
time and memory. Instead, checking for the end of the section as we go along.
---
stem/control.py | 2 +
stem/descriptor/__init__.py | 14 +++++++++--
stem/descriptor/networkstatus.py | 4 +-
stem/descriptor/router_status_entry.py | 36 ++++++++++++++++++++++++-------
4 files changed, 43 insertions(+), 13 deletions(-)
diff --git a/stem/control.py b/stem/control.py
index 6b19c74..0803ad4 100644
--- a/stem/control.py
+++ b/stem/control.py
@@ -36,7 +36,9 @@ providing its own for interacting at a higher level.
|- map_address - maps one address to another such that connections to the original are replaced with the other
|- get_version - convenience method to get tor version
|- get_server_descriptor - querying the server descriptor for a relay
+ |- get_server_descriptors - provides all presently available server descriptors
|- get_network_status - querying the router status entry for a relay
+ |- get_network_statuses - provides all preently available router status entries
|- authenticate - convenience method to authenticate the controller
+- protocolinfo - convenience method to get the protocol info
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 4c5841e..7291995 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -175,7 +175,7 @@ class Descriptor(object):
def __str__(self):
return self._raw_contents
-def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None):
+def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False):
"""
Reads from the descriptor file until we get to one of the given keywords or reach the
end of the file.
@@ -187,11 +187,14 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
given keywords
:param bool skip: skips buffering content, returning None
:param int end_position: end if we reach this point in the file
+ :param bool include_ending_keyword: provides the keyword we broke on if **True**
- :returns: **list** with the lines until we find one of the keywords
+ :returns: **list** with the lines until we find one of the keywords, this is a two value tuple with the ending keyword if include_ending_keyword is **True**
"""
content = None if skip else []
+ ending_keyword = None
+
if type(keywords) == str: keywords = (keywords,)
if ignore_first:
@@ -218,6 +221,8 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
line_keyword = line_match.groups()[0]
if line_keyword in keywords:
+ ending_keyword = line_keyword
+
if not inclusive:
descriptor_file.seek(last_position)
elif content is not None:
@@ -227,7 +232,10 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
elif content is not None:
content.append(line)
- return content
+ if include_ending_keyword:
+ return (content, ending_keyword)
+ else:
+ return content
def _get_pseudo_pgp_block(remaining_contents):
"""
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index ca6aca2..efe40e1 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -296,7 +296,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
validate,
entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2,
entry_keyword = ROUTERS_START,
- section_end_keywords = V2_FOOTER_START,
+ section_end_keywords = (V2_FOOTER_START,),
extra_args = (self,),
))
@@ -483,7 +483,7 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
validate,
entry_class = router_type,
entry_keyword = ROUTERS_START,
- section_end_keywords = FOOTER_START,
+ section_end_keywords = (FOOTER_START,),
extra_args = (self,),
))
diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py
index 61f03e8..2d02330 100644
--- a/stem/descriptor/router_status_entry.py
+++ b/stem/descriptor/router_status_entry.py
@@ -50,20 +50,40 @@ def parse_file(document_file, validate, entry_class, entry_keyword = "r", start_
* **IOError** if the file can't be read
"""
- if start_position is None:
+ if start_position:
+ document_file.seek(start_position)
+ else:
start_position = document_file.tell()
- if end_position is None:
- if section_end_keywords:
- stem.descriptor._read_until_keywords(section_end_keywords, document_file, skip = True)
- end_position = document_file.tell()
+ # check if we're starting at the end of the section (ie, there's no entries to read)
+ if section_end_keywords:
+ first_keyword = None
+ line_match = stem.descriptor.KEYWORD_LINE.match(document_file.readline())
+
+ if line_match:
+ first_keyword = line_match.groups()[0]
+
+ document_file.seek(start_position)
+
+ if first_keyword in section_end_keywords:
+ return
- document_file.seek(start_position)
- while not end_position or document_file.tell() < end_position:
- desc_content = "".join(stem.descriptor._read_until_keywords(entry_keyword, document_file, ignore_first = True, end_position = end_position))
+ while end_position is None or document_file.tell() < end_position:
+ desc_lines, ending_keyword = stem.descriptor._read_until_keywords(
+ (entry_keyword,) + section_end_keywords,
+ document_file,
+ ignore_first = True,
+ end_position = end_position,
+ include_ending_keyword = True
+ )
+
+ desc_content = "".join(desc_lines)
if desc_content:
yield entry_class(desc_content, validate, *extra_args)
+
+ # check if we stopped at the end of the section
+ if ending_keyword in section_end_keywords: break
else:
break