commit b427ad8fbea4adbe4ba92324e80425981e54fbdb
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sat Aug 16 15:37:14 2014 -0700
Using a constant for line regex we match against
Just a minor tweak so the regex we use is with the others for descriptor
content.
---
docs/change_log.rst | 4 ++++
stem/descriptor/__init__.py | 6 ++++--
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst
index 6789212..b52a377 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -40,6 +40,10 @@ Unreleased
The following are only available within Stem's `git repository
<download.html>`_.
+ * **Descriptors**
+
+ * Improved speed for parsing consensus documents by around 30% (:trac:`12859`)
+
* **Utilities**
* Added support for directories to :func:`stem.util.conf.Config.load`.
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 411ad3d..c14d984 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -68,6 +68,7 @@ except ImportError:
KEYWORD_CHAR = 'a-zA-Z0-9-'
WHITESPACE = ' \t'
KEYWORD_LINE = re.compile('^([%s]+)(?:[%s]+(.*))?$' % (KEYWORD_CHAR, WHITESPACE))
+SPECIFIC_KEYWORD_LINE = '^(%%s)(?:[%s]+(.*))?$' % WHITESPACE
PGP_BLOCK_START = re.compile('^-----BEGIN ([%s%s]+)-----$' % (KEYWORD_CHAR, WHITESPACE))
PGP_BLOCK_END = '-----END %s-----'
@@ -425,6 +426,7 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
else:
content = []
content_append = content.append
+
ending_keyword = None
if isinstance(keywords, (bytes, unicode)):
@@ -436,7 +438,7 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
if first_line is not None:
content_append(first_line)
- match_re = re.compile(r'^(%s)(?:[ \t].*|)$' % "|".join(keywords))
+ keyword_match = re.compile(SPECIFIC_KEYWORD_LINE % '|'.join(keywords))
while True:
last_position = descriptor_file.tell()
@@ -449,7 +451,7 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
if not line:
break # EOF
- line_match = match_re.match(stem.util.str_tools._to_unicode(line))
+ line_match = keyword_match.match(stem.util.str_tools._to_unicode(line))
if line_match:
ending_keyword = line_match.groups()[0]