commit e43a168fd8d8d3f561c3feb4f36af357ec32f848 Author: Damian Johnson atagar@torproject.org Date: Tue Apr 10 10:20:46 2018 -0700
Don't retry downloading descriptors when we've timed out
Presently one of our dirauths (tor26) is having some health issues. In particular connection resets and timeouts.
This has revealed a couple issues. First is that python doesn't always honor its socket timeouts. Though rare, our requests to tor26 sometimes hang for hours despite imposing a one minute timeout. Second, we retry requests even when our timeout has been reached (making callers effectively wait three times as long as they asked).
The first of these issues is still baffling me. Unfortunately I've seen it, but can't reliably repro. The second issue however is pretty straight forward so fixing it. --- docs/change_log.rst | 1 + stem/descriptor/remote.py | 13 ++++++++----- test/unit/descriptor/remote.py | 14 +++++++++++--- 3 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst index b0773382..9be7912d 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -57,6 +57,7 @@ The following are only available within Stem's `git repository * `Fallback directory v2 support https://lists.torproject.org/pipermail/tor-dev/2017-December/012721.html`_, which adds *nickname* and *extrainfo* * Added zstd and lzma compression support (:spec:`1cb56af`) * Added server descriptor's new is_hidden_service_dir attribute + * Don't retry downloading descriptors when we've timed out * Reduced maximum descriptors fetched by the remote module to match tor's new limit (:trac:`24743`) * Consensus **shared_randomness_*_reveal_count** attributes undocumented, and unavailable if retrieved before their corresponding shared_randomness_*_value attribute (:trac:`25046`) * Allow 'proto' line to have blank values (:spec:`a8455f4`) diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 97fd1e5c..cf1b1628 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -430,7 +430,7 @@ class Query(object): self._downloader_thread = threading.Thread( name = 'Descriptor Query', target = self._download_descriptors, - args = (self.retries,) + args = (self.retries, self.timeout) )
self._downloader_thread.setDaemon(True) @@ -520,7 +520,7 @@ class Query(object):
return 'http://%s:%i/%s' % (address, dirport, self.resource.lstrip('/'))
- def _download_descriptors(self, retries): + def _download_descriptors(self, retries, timeout): try: use_authority = retries == 0 and self.fall_back_to_authority self.download_url = self._pick_url(use_authority) @@ -531,7 +531,7 @@ class Query(object): self.download_url, headers = {'Accept-Encoding': ', '.join(self.compression)}, ), - timeout = self.timeout, + timeout = timeout, )
data = response.read() @@ -562,9 +562,12 @@ class Query(object): except: exc = sys.exc_info()[1]
- if retries > 0: + if timeout is not None: + timeout -= time.time() - self.start_time + + if retries > 0 and (timeout is None or timeout > 0): log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc)) - return self._download_descriptors(retries - 1) + return self._download_descriptors(retries - 1, timeout) else: log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc)) self.error = exc diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py index 05cf1d57..cc4c3997 100644 --- a/test/unit/descriptor/remote.py +++ b/test/unit/descriptor/remote.py @@ -4,6 +4,7 @@ Unit tests for stem.descriptor.remote.
import socket import tempfile +import time import unittest
import stem.descriptor.remote @@ -273,19 +274,26 @@ class TestDescriptorDownloader(unittest.TestCase):
@patch(URL_OPEN) def test_query_with_timeout(self, urlopen_mock): - urlopen_mock.side_effect = socket.timeout('connection timed out') + def urlopen_call(*args, **kwargs): + time.sleep(0.06) + raise socket.timeout('connection timed out') + + urlopen_mock.side_effect = urlopen_call
query = stem.descriptor.remote.Query( TEST_RESOURCE, 'server-descriptor 1.0', endpoints = [('128.31.0.39', 9131)], fall_back_to_authority = False, - timeout = 5, + timeout = 0.1, validate = True, )
+ # After two requests we'll have reached our total permissable timeout. + # Check that we don't make a third. + self.assertRaises(socket.timeout, query.run) - self.assertEqual(3, urlopen_mock.call_count) + self.assertEqual(2, urlopen_mock.call_count)
@patch(URL_OPEN, _urlopen_mock(TEST_DESCRIPTOR)) def test_can_iterate_multiple_times(self):
tor-commits@lists.torproject.org