commit 964eea9fb1ecb282cfbabafc29da45f88ed6ee3b Author: Nick Mathewson nickm@torproject.org Date: Wed Aug 13 20:46:30 2014 -0400
Optimze _read_until_keywords a little
- use an unconditional function call in the inner loop rather than checking whether content is set all the time. This saves a few percent.
- use a compiled regex to determine whether we have found the target keywords. This is a fairly big win fwict. Perhaps this regex can be tuned further? --- stem/descriptor/__init__.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 9fe7235..411ad3d 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -419,7 +419,12 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi **True** """
- content = None if skip else [] + if skip: + content = None + content_append = lambda x: None + else: + content = [] + content_append = content.append ending_keyword = None
if isinstance(keywords, (bytes, unicode)): @@ -428,8 +433,10 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi if ignore_first: first_line = descriptor_file.readline()
- if content is not None and first_line is not None: - content.append(first_line) + if first_line is not None: + content_append(first_line) + + match_re = re.compile(r'^(%s)(?:[ \t].*|)$' % "|".join(keywords))
while True: last_position = descriptor_file.tell() @@ -442,25 +449,19 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi if not line: break # EOF
- line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(line)) + line_match = match_re.match(stem.util.str_tools._to_unicode(line))
- if not line_match: - # no spaces or tabs in the line - line_keyword = stem.util.str_tools._to_unicode(line.strip()) - else: - line_keyword = line_match.groups()[0] - - if line_keyword in keywords: - ending_keyword = line_keyword + if line_match: + ending_keyword = line_match.groups()[0]
if not inclusive: descriptor_file.seek(last_position) - elif content is not None: - content.append(line) + else: + content_append(line)
break - elif content is not None: - content.append(line) + else: + content_append(line)
if include_ending_keyword: return (content, ending_keyword)
tor-commits@lists.torproject.org