commit 783061dfd7e2f53fdbbc7b21757247b61a0287cd Author: Damian Johnson atagar@torproject.org Date: Wed Feb 1 10:41:59 2017 -0800
Avoid excessive recursion when reading descriptors
Interesting! Our hasattr() calls when getting descriptor attributes recurse. Not infinintely (not sure why) but several dozen times, having a huge hit on our performance.
... weird.
Also making a few tweaks to speed up our extrainfo tests. We don't need to be quite so exhaustive with the time interval attributes. It's sufficient to test all attributes once, then check validation for a single attribute to avoid an n^2 assertion proliferation.
Overall on my netbook this drops the extrainfo unit test runtime from 2.9s to 1.3s (55.2% faster). --- stem/descriptor/__init__.py | 21 ++++++-- test/unit/descriptor/extrainfo_descriptor.py | 81 +++++++++++++--------------- 2 files changed, 54 insertions(+), 48 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index c16106d..8c31b86 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -642,22 +642,33 @@ class Descriptor(object): return stem.util.str_tools._to_unicode(digest_hash.hexdigest().upper())
def __getattr__(self, name): + # We can't use standard hasattr() since it calls this function, recursing. + # Doing so works since it stops recursing after several dozen iterations + # (not sure why), but horrible in terms of performance. + + def has_attr(): + try: + super(Descriptor, self).__getattribute__(name) + return True + except: + return False + # If an attribute we should have isn't present it means either... # # a. we still need to lazy load this # b. we read the whole descriptor but it wasn't present, so needs the default
- if name in self.ATTRIBUTES and not hasattr(self, name): + if name in self.ATTRIBUTES and not has_attr(): default, parsing_function = self.ATTRIBUTES[name]
if self._lazy_loading: try: parsing_function(self, self._entries) except (ValueError, KeyError): - try: - # despite having a validation failure check to see if we set something - return super(Descriptor, self).__getattribute__(name) - except AttributeError: + # Despite having a validation failure we might've set something. If + # not then set the default. + + if not has_attr(): setattr(self, name, _copy(default)) else: setattr(self, name, _copy(default)) diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py index 8dc4130..79d0b5c 100644 --- a/test/unit/descriptor/extrainfo_descriptor.py +++ b/test/unit/descriptor/extrainfo_descriptor.py @@ -375,12 +375,10 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
test_entries = ( '', - '2012-05-03 ', '2012-05-03', '2012-05-03 12:07:60 (500 s)', - '2012-05-03 12:07:50 (500s)', '2012-05-03 12:07:50 (500 s', - '2012-05-03 12:07:50 (500 )', + '2012-05-03 12:07:50 (500s)', '2012-05-03 12:07:50 (500 s)11', '2012-05-03 12:07:50 (500 s) 277431,12089,0', '2012-05-03 12:07:50 (500 s) 277431,12089,0a,2134', @@ -495,21 +493,21 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw self.assertEqual(datetime.datetime(2012, 5, 3, 12, 7, 50), getattr(desc, end_attr)) self.assertEqual(500, getattr(desc, interval_attr))
- test_entries = ( - '', - '2012-05-03 ', - '2012-05-03', - '2012-05-03 12:07:60 (500 s)', - '2012-05-03 12:07:50 (500s)', - '2012-05-03 12:07:50 (500 s', - '2012-05-03 12:07:50 (500 )', - ) + test_entries = ( + '', + '2012-05-03 ', + '2012-05-03', + '2012-05-03 12:07:60 (500 s)', + '2012-05-03 12:07:50 (500s)', + '2012-05-03 12:07:50 (500 s', + '2012-05-03 12:07:50 (500 )', + )
- for entry in test_entries: - desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True) - desc = self._expect_invalid_attr(desc_text) - self.assertEqual(None, getattr(desc, end_attr)) - self.assertEqual(None, getattr(desc, interval_attr)) + for entry in test_entries: + desc_text = get_relay_extrainfo_descriptor({'entry-stats-end': entry}, content = True) + desc = self._expect_invalid_attr(desc_text) + self.assertEqual(None, desc.entry_stats_end) + self.assertEqual(None, desc.entry_stats_interval)
def test_timestamp_interval_and_value_lines(self): """ @@ -523,35 +521,32 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw interval_attr = base_attr + '_interval' values_attr = base_attr + '_values'
- test_entries = ( - ('', []), - (' ', []), - (' 50,11,5', [50, 11, 5]), - ) + desc = get_relay_extrainfo_descriptor({keyword: '2012-05-03 12:07:50 (500 s) 50,11,5'}) + self.assertEqual(datetime.datetime(2012, 5, 3, 12, 7, 50), getattr(desc, end_attr)) + self.assertEqual(500, getattr(desc, interval_attr)) + self.assertEqual([50, 11, 5], getattr(desc, values_attr))
- for test_values, expected_values in test_entries: - desc = get_relay_extrainfo_descriptor({keyword: '2012-05-03 12:07:50 (500 s)%s' % test_values}) - self.assertEqual(datetime.datetime(2012, 5, 3, 12, 7, 50), getattr(desc, end_attr)) - self.assertEqual(500, getattr(desc, interval_attr)) - self.assertEqual(expected_values, getattr(desc, values_attr)) + for test_value in ('', ' '): + desc = get_relay_extrainfo_descriptor({'write-history': '2012-05-03 12:07:50 (500 s)%s' % test_value}) + self.assertEqual(datetime.datetime(2012, 5, 3, 12, 7, 50), desc.write_history_end) + self.assertEqual(500, desc.write_history_interval) + self.assertEqual([], desc.write_history_values)
- test_entries = ( - '', - '2012-05-03 ', - '2012-05-03', - '2012-05-03 12:07:60 (500 s)', - '2012-05-03 12:07:50 (500s)', - '2012-05-03 12:07:50 (500 s', - '2012-05-03 12:07:50 (500 )', - '2012-05-03 12:07:50 (500 s)11', - ) + test_entries = ( + '', + '2012-05-03', + '2012-05-03 12:07:60 (500 s)', + '2012-05-03 12:07:50 (500s)', + '2012-05-03 12:07:50 (500 s', + '2012-05-03 12:07:50 (500 s)11', + )
- for entry in test_entries: - desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True) - desc = self._expect_invalid_attr(desc_text) - self.assertEqual(None, getattr(desc, end_attr)) - self.assertEqual(None, getattr(desc, interval_attr)) - self.assertEqual(None, getattr(desc, values_attr)) + for entry in test_entries: + desc_text = get_relay_extrainfo_descriptor({'write-history': entry}, content = True) + desc = self._expect_invalid_attr(desc_text) + self.assertEqual(None, desc.write_history_end) + self.assertEqual(None, desc.write_history_interval) + self.assertEqual(None, desc.write_history_values)
def test_port_mapping_lines(self): """
tor-commits@lists.torproject.org