commit 3eecbe1f5ce468769d5d884172eeca677e2f1b30
Author: Damian Johnson <atagar(a)torproject.org>
Date: Wed Jan 25 10:58:50 2017 -0800
Defer defaulting to speed descriptor parsing
Defaulting descriptor attributes when they're accessed rather than doing this
right away. This speeds reading descriptors with validation by ~25%...
Before
descriptor.microdescriptor... success (15.38s)
descriptor.networkstatus... success (23.25s)
After
descriptor.microdescriptor... success (11.67s)
descriptor.networkstatus... success (19.80s)
---
docs/change_log.rst | 2 +-
stem/descriptor/__init__.py | 48 ++++++++++++++++++++++-----------------------
2 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst
index 477db2e..1111afd 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -45,7 +45,7 @@ The following are only available within Stem's `git repository
* **Descriptors**
- * Sped descriptor reading by ~5% by not cloning immutable fields
+ * Sped descriptor reading by ~25% by deferring defaulting when validating
* Support for protocol descriptor fields (:spec:`eb4fb3c`)
* Shared randomness properties weren't being read in votes (:trac:`21102`)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 67ea374..1262f6a 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -437,6 +437,15 @@ def _parse_key_block(keyword, attribute, expected_block_type, value_attribute =
return _parse
+def _copy(default):
+ if default is None or isinstance(default, (bool, stem.exit_policy.ExitPolicy)):
+ return default # immutable
+ elif default in EMPTY_COLLECTION:
+ return type(default)() # collection construction tad faster than copy
+ else:
+ return copy.copy(default)
+
+
class Descriptor(object):
"""
Common parent for all types of descriptors.
@@ -516,21 +525,6 @@ class Descriptor(object):
if parser_for_line is None:
parser_for_line = self.PARSER_FOR_LINE
- # set defaults
-
- for attr in self.ATTRIBUTES:
- if not hasattr(self, attr):
- value = self.ATTRIBUTES[attr][0]
-
- if value is None or isinstance(value, (bool, stem.exit_policy.ExitPolicy)):
- pass # immutable
- elif value in EMPTY_COLLECTION:
- value = type(value)() # collection construction tad faster than copy
- else:
- value = copy.copy(value)
-
- setattr(self, attr, value)
-
for keyword, values in list(entries.items()):
try:
if keyword in parser_for_line:
@@ -654,19 +648,25 @@ class Descriptor(object):
if name in ('_lazy_loading', 'ATTRIBUTES'):
return super(Descriptor, self).__getattribute__(name)
- # If attribute isn't already present we might be lazy loading it...
+ # If an attribute we should have isn't present it means either...
+ #
+ # a. we still need to lazy load this
+ # b. we read the whole descriptor but it wasn't present, so needs the default
- if self._lazy_loading and name in self.ATTRIBUTES:
+ if name in self.ATTRIBUTES and not hasattr(self, name):
default, parsing_function = self.ATTRIBUTES[name]
- try:
- parsing_function(self, self._entries)
- except (ValueError, KeyError):
+ if self._lazy_loading:
try:
- # despite having a validation failure check to see if we set something
- return super(Descriptor, self).__getattribute__(name)
- except AttributeError:
- setattr(self, name, copy.copy(default))
+ parsing_function(self, self._entries)
+ except (ValueError, KeyError):
+ try:
+ # despite having a validation failure check to see if we set something
+ return super(Descriptor, self).__getattribute__(name)
+ except AttributeError:
+ setattr(self, name, _copy(default))
+ else:
+ setattr(self, name, _copy(default))
return super(Descriptor, self).__getattribute__(name)