[tor-commits] [stem/master] Don't retry downloading descriptors when we've timed out

atagar at torproject.org atagar at torproject.org
Tue Apr 10 18:31:02 UTC 2018


commit e43a168fd8d8d3f561c3feb4f36af357ec32f848
Author: Damian Johnson <atagar at torproject.org>
Date:   Tue Apr 10 10:20:46 2018 -0700

    Don't retry downloading descriptors when we've timed out
    
    Presently one of our dirauths (tor26) is having some health issues. In
    particular connection resets and timeouts.
    
    This has revealed a couple issues. First is that python doesn't always honor
    its socket timeouts. Though rare, our requests to tor26 sometimes hang for
    hours despite imposing a one minute timeout. Second, we retry requests even
    when our timeout has been reached (making callers effectively wait three times
    as long as they asked).
    
    The first of these issues is still baffling me. Unfortunately I've seen it, but
    can't reliably repro. The second issue however is pretty straight forward so
    fixing it.
---
 docs/change_log.rst            |  1 +
 stem/descriptor/remote.py      | 13 ++++++++-----
 test/unit/descriptor/remote.py | 14 +++++++++++---
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index b0773382..9be7912d 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -57,6 +57,7 @@ The following are only available within Stem's `git repository
   * `Fallback directory v2 support <https://lists.torproject.org/pipermail/tor-dev/2017-December/012721.html>`_, which adds *nickname* and *extrainfo*
   * Added zstd and lzma compression support (:spec:`1cb56af`)
   * Added server descriptor's new is_hidden_service_dir attribute
+  * Don't retry downloading descriptors when we've timed out
   * Reduced maximum descriptors fetched by the remote module to match tor's new limit (:trac:`24743`)
   * Consensus **shared_randomness_*_reveal_count** attributes undocumented, and unavailable if retrieved before their corresponding shared_randomness_*_value attribute (:trac:`25046`)
   * Allow 'proto' line to have blank values (:spec:`a8455f4`)
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 97fd1e5c..cf1b1628 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -430,7 +430,7 @@ class Query(object):
         self._downloader_thread = threading.Thread(
           name = 'Descriptor Query',
           target = self._download_descriptors,
-          args = (self.retries,)
+          args = (self.retries, self.timeout)
         )
 
         self._downloader_thread.setDaemon(True)
@@ -520,7 +520,7 @@ class Query(object):
 
     return 'http://%s:%i/%s' % (address, dirport, self.resource.lstrip('/'))
 
-  def _download_descriptors(self, retries):
+  def _download_descriptors(self, retries, timeout):
     try:
       use_authority = retries == 0 and self.fall_back_to_authority
       self.download_url = self._pick_url(use_authority)
@@ -531,7 +531,7 @@ class Query(object):
           self.download_url,
           headers = {'Accept-Encoding': ', '.join(self.compression)},
         ),
-        timeout = self.timeout,
+        timeout = timeout,
       )
 
       data = response.read()
@@ -562,9 +562,12 @@ class Query(object):
     except:
       exc = sys.exc_info()[1]
 
-      if retries > 0:
+      if timeout is not None:
+        timeout -= time.time() - self.start_time
+
+      if retries > 0 and (timeout is None or timeout > 0):
         log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc))
-        return self._download_descriptors(retries - 1)
+        return self._download_descriptors(retries - 1, timeout)
       else:
         log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc))
         self.error = exc
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index 05cf1d57..cc4c3997 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -4,6 +4,7 @@ Unit tests for stem.descriptor.remote.
 
 import socket
 import tempfile
+import time
 import unittest
 
 import stem.descriptor.remote
@@ -273,19 +274,26 @@ class TestDescriptorDownloader(unittest.TestCase):
 
   @patch(URL_OPEN)
   def test_query_with_timeout(self, urlopen_mock):
-    urlopen_mock.side_effect = socket.timeout('connection timed out')
+    def urlopen_call(*args, **kwargs):
+      time.sleep(0.06)
+      raise socket.timeout('connection timed out')
+
+    urlopen_mock.side_effect = urlopen_call
 
     query = stem.descriptor.remote.Query(
       TEST_RESOURCE,
       'server-descriptor 1.0',
       endpoints = [('128.31.0.39', 9131)],
       fall_back_to_authority = False,
-      timeout = 5,
+      timeout = 0.1,
       validate = True,
     )
 
+    # After two requests we'll have reached our total permissable timeout.
+    # Check that we don't make a third.
+
     self.assertRaises(socket.timeout, query.run)
-    self.assertEqual(3, urlopen_mock.call_count)
+    self.assertEqual(2, urlopen_mock.call_count)
 
   @patch(URL_OPEN, _urlopen_mock(TEST_DESCRIPTOR))
   def test_can_iterate_multiple_times(self):



More information about the tor-commits mailing list