commit 1a50ce30e95dd11dbc80167f4c4df2df0b05f55b Author: Damian Johnson atagar@torproject.org Date: Tue Jan 24 10:44:01 2017 -0800
Don't clone immutables when parsing descriptors
Avoiding an unnecessary copy call on types which are immutable. Seems even for empty lists and dictionaries calling the constructor rather than copy() is a tad faster.
On my wee little netbook this speeds up reading microdescriptors by ~5% when there's validation and speeds our integ tests a bit...
Before descriptor.microdescriptor... success (15.38s) descriptor.networkstatus... success (23.25s)
After descriptor.microdescriptor... success (13.96s) descriptor.networkstatus... success (22.77s)
This should help all descriptor reading though when validation is disabled this is boon is only when accessing attributes. --- docs/change_log.rst | 1 + stem/descriptor/__init__.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst index e16a948..477db2e 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -45,6 +45,7 @@ The following are only available within Stem's `git repository
* **Descriptors**
+ * Sped descriptor reading by ~5% by not cloning immutable fields * Support for protocol descriptor fields (:spec:`eb4fb3c`) * Shared randomness properties weren't being read in votes (:trac:`21102`)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index d7b1aea..67ea374 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -78,6 +78,7 @@ KEYWORD_LINE = re.compile('^([%s]+)(?:[%s]+(.*))?$' % (KEYWORD_CHAR, WHITESPACE) SPECIFIC_KEYWORD_LINE = '^(%%s)(?:[%s]+(.*))?$' % WHITESPACE PGP_BLOCK_START = re.compile('^-----BEGIN ([%s%s]+)-----$' % (KEYWORD_CHAR, WHITESPACE)) PGP_BLOCK_END = '-----END %s-----' +EMPTY_COLLECTION = ([], {}, set())
DocumentHandler = stem.util.enum.UppercaseEnum( 'ENTRIES', @@ -519,7 +520,16 @@ class Descriptor(object):
for attr in self.ATTRIBUTES: if not hasattr(self, attr): - setattr(self, attr, copy.copy(self.ATTRIBUTES[attr][0])) + value = self.ATTRIBUTES[attr][0] + + if value is None or isinstance(value, (bool, stem.exit_policy.ExitPolicy)): + pass # immutable + elif value in EMPTY_COLLECTION: + value = type(value)() # collection construction tad faster than copy + else: + value = copy.copy(value) + + setattr(self, attr, value)
for keyword, values in list(entries.items()): try:
tor-commits@lists.torproject.org