commit 4a878f05489fcf680fb3e390363cc9da6985210a Author: Damian Johnson atagar@torproject.org Date: Tue Jun 4 09:51:09 2019 -0700
Integ tests for stem.descriptor.remote hangs
Our Serge and tor26 directory authorities intentionally break their DirPort's ability to respond to uncompressed descriptor requests in an effort to mess with DOS attackers. Unfortunately doing so messes with us as well.
For example, the following is a curl request I gave up on after 90 seconds...
% curl 86.59.21.38:80/tor/status-vote/current/consensus > /tmp/dump % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 33088 0 33088 0 0 364 0 --:--:-- 0:01:30 --:--:-- 0
The trouble is that Python's socket module timeout only takes effect when we stop receiving data. However, because these dirauths dribble a few bits at a time it never takes effect, effectively causing our tests to hang.
I adjusted DocTor a while ago to avoid these sand traps, but a couple other issues remained...
* One of our integ tests exercised downloading the consensus from each dirauth. Changed it to avoid the problematic dirauths.
* When using a DescriptorDownloader it populated a default 'endpoints' variable that caused us to short circuit the code that skipped these dirauths. Fixed that as well.
With this 'run_tests.py --integ --target ONLINE' should no longer hang. --- stem/descriptor/remote.py | 24 ++++++++++++++++++------ test/integ/descriptor/remote.py | 3 +++ 2 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index ab2fc091..b2f066a3 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -140,6 +140,16 @@ SINGLETON_DOWNLOADER = None
DETACHED_SIGNATURE_TYPE = 'detached-signature'
+# Some authorities intentionally break their DirPort to discourage DOS. In +# particular they throttle the rate to such a degree that requests can take +# hours to complete. Unfortunately Python's socket timeouts only kick in +# when we stop receiving data, so these 'sandtraps' cause our downloads to +# hang pretty much indefinitely. +# +# Best we can do is simply avoid attempting to use them in the first place. + +DIR_PORT_BLACKLIST = ('tor26', 'Serge') +
def get_instance(): """ @@ -560,11 +570,12 @@ class Query(object): :param bool use_authority: ignores our endpoints and uses a directory authority instead
- :returns: **str** for the url being queried by this request + :returns: :class:`stem.Endpoint` for the location to be downloaded + from by this request """
if use_authority or not self.endpoints: - picked = random.choice([auth for auth in stem.directory.Authority.from_cache().values() if auth.nickname not in ('tor26', 'Serge')]) + picked = random.choice([auth for auth in stem.directory.Authority.from_cache().values() if auth.nickname not in DIR_PORT_BLACKLIST]) return stem.DirPort(picked.address, picked.dir_port) else: return random.choice(self.endpoints) @@ -575,15 +586,17 @@ class Query(object): endpoint = self._pick_endpoint(use_authority = retries == 0 and self.fall_back_to_authority)
if isinstance(endpoint, stem.ORPort): + downloaded_from = 'ORPort %s:%s (resource %s)' % (endpoint.address, endpoint.port, self.resource) self.content, self.reply_headers = _download_from_orport(endpoint, self.compression, self.resource) elif isinstance(endpoint, stem.DirPort): self.download_url = 'http://%s:%i/%s' % (endpoint.address, endpoint.port, self.resource.lstrip('/')) + downloaded_from = self.download_url self.content, self.reply_headers = _download_from_dirport(self.download_url, self.compression, timeout) else: raise ValueError("BUG: endpoints can only be ORPorts or DirPorts, '%s' was a %s" % (endpoint, type(endpoint).__name__))
self.runtime = time.time() - self.start_time - log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime)) + log.trace("Descriptors retrieved from %s in %0.2fs" % (downloaded_from, self.runtime)) except: exc = sys.exc_info()[1]
@@ -615,8 +628,7 @@ class DescriptorDownloader(object): def __init__(self, use_mirrors = False, **default_args): self._default_args = default_args
- directories = list(stem.directory.Authority.from_cache().values()) - self._endpoints = [(directory.address, directory.dir_port) for directory in directories] + self._endpoints = None
if use_mirrors: try: @@ -637,7 +649,7 @@ class DescriptorDownloader(object): :raises: **Exception** if unable to determine the directory mirrors """
- directories = stem.directory.Authority.from_cache().values() + directories = [auth for auth in stem.directory.Authority.from_cache().values() if auth.nickname not in DIR_PORT_BLACKLIST] new_endpoints = set([(directory.address, directory.dir_port) for directory in directories])
consensus = list(self.get_consensus(document_handler = stem.descriptor.DocumentHandler.DOCUMENT).run())[0] diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py index 41fded2c..5788f322 100644 --- a/test/integ/descriptor/remote.py +++ b/test/integ/descriptor/remote.py @@ -102,6 +102,9 @@ class TestDescriptorDownloader(unittest.TestCase): queries = []
for nickname, authority in stem.directory.Authority.from_cache().items(): + if nickname in stem.descriptor.remote.DIR_PORT_BLACKLIST: + continue + queries.append((stem.descriptor.remote.Query( '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', 'server-descriptor 1.0',
tor-commits@lists.torproject.org