[tor-commits] [stem/master] Avoid excessive recursion when reading descriptors

atagar at torproject.org atagar at torproject.org
Thu Feb 2 05:18:27 UTC 2017


commit 783061dfd7e2f53fdbbc7b21757247b61a0287cd
Author: Damian Johnson <atagar at torproject.org>
Date:   Wed Feb 1 10:41:59 2017 -0800

    Avoid excessive recursion when reading descriptors
    
    Interesting! Our hasattr() calls when getting descriptor attributes recurse.
    Not infinintely (not sure why) but several dozen times, having a huge hit on
    our performance.
    
    ... weird.
    
    Also making a few tweaks to speed up our extrainfo tests. We don't need to be
    quite so exhaustive with the time interval attributes. It's sufficient to test
    all attributes once, then check validation for a single attribute to avoid an
    n^2 assertion proliferation.
    
    Overall on my netbook this drops the extrainfo unit test runtime from 2.9s to
    1.3s (55.2% faster).
---
 stem/descriptor/__init__.py                  | 21 ++++++--
 test/unit/descriptor/extrainfo_descriptor.py | 81 +++++++++++++---------------
 2 files changed, 54 insertions(+), 48 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index c16106d..8c31b86 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -642,22 +642,33 @@ class Descriptor(object):
     return stem.util.str_tools._to_unicode(digest_hash.hexdigest().upper())
 
   def __getattr__(self, name):
+    # We can't use standard hasattr() since it calls this function, recursing.
+    # Doing so works since it stops recursing after several dozen iterations
+    # (not sure why), but horrible in terms of performance.
+
+    def has_attr():
+      try:
+        super(Descriptor, self).__getattribute__(name)
+        return True
+      except:
+        return False
+
     # If an attribute we should have isn't present it means either...
     #
     #   a. we still need to lazy load this
     #   b. we read the whole descriptor but it wasn't present, so needs the default
 
-    if name in self.ATTRIBUTES and not hasattr(self, name):
+    if name in self.ATTRIBUTES and not has_attr():
       default, parsing_function = self.ATTRIBUTES[name]
 
       if self._lazy_loading:
         try:
           parsing_function(self, self._entries)
         except (ValueError, KeyError):
-          try:
-            # despite having a validation failure check to see if we set something
-            return super(Descriptor, self).__getattribute__(name)
-          except AttributeError:
+          # Despite having a validation failure we might've set something. If
+          # not then set the default.
+
+          if not has_attr():
             setattr(self, name, _copy(default))
       else:
         setattr(self, name, _copy(default))
diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py
index 8dc4130..79d0b5c 100644
--- a/test/unit/descriptor/extrainfo_descriptor.py
+++ b/test/unit/descriptor/extrainfo_descriptor.py
@@ -375,12 +375,10 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
 
     test_entries = (
       '',
-      '2012-05-03 ',
       '2012-05-03',
       '2012-05-03 12:07:60 (500 s)',
-      '2012-05-03 12:07:50 (500s)',
       '2012-05-03 12:07:50 (500 s',
-      '2012-05-03 12:07:50 (500 )',
+      '2012-05-03 12:07:50 (500s)',
       '2012-05-03 12:07:50 (500 s)11',
       '2012-05-03 12:07:50 (500 s) 277431,12089,0',
       '2012-05-03 12:07:50 (500 s) 277431,12089,0a,2134',
@@ -495,21 +493,21 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
       self.assertEqual(datetime.datetime(2012, 5, 3, 12, 7, 50), getattr(desc, end_attr))
       self.assertEqual(500, getattr(desc, interval_attr))
 
-      test_entries = (
-        '',
-        '2012-05-03 ',
-        '2012-05-03',
-        '2012-05-03 12:07:60 (500 s)',
-        '2012-05-03 12:07:50 (500s)',
-        '2012-05-03 12:07:50 (500 s',
-        '2012-05-03 12:07:50 (500 )',
-      )
+    test_entries = (
+      '',
+      '2012-05-03 ',
+      '2012-05-03',
+      '2012-05-03 12:07:60 (500 s)',
+      '2012-05-03 12:07:50 (500s)',
+      '2012-05-03 12:07:50 (500 s',
+      '2012-05-03 12:07:50 (500 )',
+    )
 
-      for entry in test_entries:
-        desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True)
-        desc = self._expect_invalid_attr(desc_text)
-        self.assertEqual(None, getattr(desc, end_attr))
-        self.assertEqual(None, getattr(desc, interval_attr))
+    for entry in test_entries:
+      desc_text = get_relay_extrainfo_descriptor({'entry-stats-end': entry}, content = True)
+      desc = self._expect_invalid_attr(desc_text)
+      self.assertEqual(None, desc.entry_stats_end)
+      self.assertEqual(None, desc.entry_stats_interval)
 
   def test_timestamp_interval_and_value_lines(self):
     """
@@ -523,35 +521,32 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
       interval_attr = base_attr + '_interval'
       values_attr = base_attr + '_values'
 
-      test_entries = (
-        ('', []),
-        (' ', []),
-        (' 50,11,5', [50, 11, 5]),
-      )
+      desc = get_relay_extrainfo_descriptor({keyword: '2012-05-03 12:07:50 (500 s) 50,11,5'})
+      self.assertEqual(datetime.datetime(2012, 5, 3, 12, 7, 50), getattr(desc, end_attr))
+      self.assertEqual(500, getattr(desc, interval_attr))
+      self.assertEqual([50, 11, 5], getattr(desc, values_attr))
 
-      for test_values, expected_values in test_entries:
-        desc = get_relay_extrainfo_descriptor({keyword: '2012-05-03 12:07:50 (500 s)%s' % test_values})
-        self.assertEqual(datetime.datetime(2012, 5, 3, 12, 7, 50), getattr(desc, end_attr))
-        self.assertEqual(500, getattr(desc, interval_attr))
-        self.assertEqual(expected_values, getattr(desc, values_attr))
+    for test_value in ('', ' '):
+      desc = get_relay_extrainfo_descriptor({'write-history': '2012-05-03 12:07:50 (500 s)%s' % test_value})
+      self.assertEqual(datetime.datetime(2012, 5, 3, 12, 7, 50), desc.write_history_end)
+      self.assertEqual(500, desc.write_history_interval)
+      self.assertEqual([], desc.write_history_values)
 
-      test_entries = (
-        '',
-        '2012-05-03 ',
-        '2012-05-03',
-        '2012-05-03 12:07:60 (500 s)',
-        '2012-05-03 12:07:50 (500s)',
-        '2012-05-03 12:07:50 (500 s',
-        '2012-05-03 12:07:50 (500 )',
-        '2012-05-03 12:07:50 (500 s)11',
-      )
+    test_entries = (
+      '',
+      '2012-05-03',
+      '2012-05-03 12:07:60 (500 s)',
+      '2012-05-03 12:07:50 (500s)',
+      '2012-05-03 12:07:50 (500 s',
+      '2012-05-03 12:07:50 (500 s)11',
+    )
 
-      for entry in test_entries:
-        desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True)
-        desc = self._expect_invalid_attr(desc_text)
-        self.assertEqual(None, getattr(desc, end_attr))
-        self.assertEqual(None, getattr(desc, interval_attr))
-        self.assertEqual(None, getattr(desc, values_attr))
+    for entry in test_entries:
+      desc_text = get_relay_extrainfo_descriptor({'write-history': entry}, content = True)
+      desc = self._expect_invalid_attr(desc_text)
+      self.assertEqual(None, desc.write_history_end)
+      self.assertEqual(None, desc.write_history_interval)
+      self.assertEqual(None, desc.write_history_values)
 
   def test_port_mapping_lines(self):
     """



More information about the tor-commits mailing list