[tor-commits] [stem/master] Using a constant for line regex we match against

atagar at torproject.org atagar at torproject.org
Sat Aug 16 22:43:45 UTC 2014


commit b427ad8fbea4adbe4ba92324e80425981e54fbdb
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Aug 16 15:37:14 2014 -0700

    Using a constant for line regex we match against
    
    Just a minor tweak so the regex we use is with the others for descriptor
    content.
---
 docs/change_log.rst         |    4 ++++
 stem/descriptor/__init__.py |    6 ++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index 6789212..b52a377 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -40,6 +40,10 @@ Unreleased
 The following are only available within Stem's `git repository
 <download.html>`_.
 
+ * **Descriptors**
+
+  * Improved speed for parsing consensus documents by around 30% (:trac:`12859`)
+
  * **Utilities**
 
   * Added support for directories to :func:`stem.util.conf.Config.load`.
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 411ad3d..c14d984 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -68,6 +68,7 @@ except ImportError:
 KEYWORD_CHAR = 'a-zA-Z0-9-'
 WHITESPACE = ' \t'
 KEYWORD_LINE = re.compile('^([%s]+)(?:[%s]+(.*))?$' % (KEYWORD_CHAR, WHITESPACE))
+SPECIFIC_KEYWORD_LINE = '^(%%s)(?:[%s]+(.*))?$' % WHITESPACE
 PGP_BLOCK_START = re.compile('^-----BEGIN ([%s%s]+)-----$' % (KEYWORD_CHAR, WHITESPACE))
 PGP_BLOCK_END = '-----END %s-----'
 
@@ -425,6 +426,7 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
   else:
     content = []
     content_append = content.append
+
   ending_keyword = None
 
   if isinstance(keywords, (bytes, unicode)):
@@ -436,7 +438,7 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
     if first_line is not None:
       content_append(first_line)
 
-  match_re = re.compile(r'^(%s)(?:[ \t].*|)$' % "|".join(keywords))
+  keyword_match = re.compile(SPECIFIC_KEYWORD_LINE % '|'.join(keywords))
 
   while True:
     last_position = descriptor_file.tell()
@@ -449,7 +451,7 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
     if not line:
       break  # EOF
 
-    line_match = match_re.match(stem.util.str_tools._to_unicode(line))
+    line_match = keyword_match.match(stem.util.str_tools._to_unicode(line))
 
     if line_match:
       ending_keyword = line_match.groups()[0]





More information about the tor-commits mailing list