commit 6d4cbd2180d11682d7d65b6926c562155907d049 Author: Damian Johnson atagar@torproject.org Date: Thu Aug 1 19:58:07 2019 -0700
Download helper utility
Stem only raises documented exceptions, but urllib makes this difficult in that it raises a wide variety of exceptions. Wrapping it within a DownloadFailed exception that retains its wrapped exception. --- stem/descriptor/collector.py | 76 +++------------------------- stem/util/connection.py | 112 ++++++++++++++++++++++++++++++++++++++++++ test/integ/util/connection.py | 33 +++++++++++-- test/unit/util/connection.py | 43 ++++++++++++++++ 4 files changed, 192 insertions(+), 72 deletions(-)
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py index a78d60c4..d9f159e1 100644 --- a/stem/descriptor/collector.py +++ b/stem/descriptor/collector.py @@ -54,20 +54,13 @@ import json import os import re import shutil -import sys import tempfile import time
+import stem.util.connection import stem.util.str_tools
from stem.descriptor import Compression, parse_file -from stem.util import log - -try: - # account for urllib's change between python 2.x and 3.x - import urllib.request as urllib -except ImportError: - import urllib2 as urllib
COLLECTOR_URL = 'https://collector.torproject.org/' REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old @@ -148,12 +141,7 @@ def get_server_descriptors(start = None, end = None, cache_to = None, timeout = :class:`~stem.descriptor.server_descriptor.ServerDescriptor` for the given time range
- :raises: - * **socket.timeout** if our request timed out - * **urllib2.URLError** for most request failures - - Note that the urllib2 module may fail with other exception types, in - which case we'll pass it along. + :raises: :class:`~stem.util.connection.DownloadFailed` if the download fails """
for f in get_instance().files('server-descriptor', start, end): @@ -161,43 +149,6 @@ def get_server_descriptors(start = None, end = None, cache_to = None, timeout = yield desc
-def _download(url, timeout, retries): - """ - Download from the given url. - - :param str url: uncompressed url to download from - :param int timeout: timeout when connection becomes idle, no timeout applied - if **None** - :param int retires: maximum attempts to impose - - :returns: content of the given url - - :raises: - * **socket.timeout** if our request timed out - * **urllib2.URLError** for most request failures - - Note that the urllib2 module may fail with other exception types, in - which case we'll pass it along. - """ - - start_time = time.time() - - try: - return urllib.urlopen(url, timeout = timeout).read() - except: - exc = sys.exc_info()[1] - - if timeout is not None: - timeout -= time.time() - start_time - - if retries > 0 and (timeout is None or timeout > 0): - log.debug("Failed to download from CollecTor at '%s' (%i retries remaining): %s" % (url, retries, exc)) - return _download(url, timeout, retries - 1) - else: - log.debug("Failed to download from CollecTor at '%s': %s" % (url, exc)) - raise - - class File(object): """ File within CollecTor. @@ -258,11 +209,7 @@ class File(object): :raises: * **ValueError** if unable to determine the descirptor type * **TypeError** if we cannot parse this descriptor type - * **socket.timeout** if our request timed out - * **urllib2.URLError** for most request failures - - Note that the urllib2 module may fail with other exception types, in - which case we'll pass it along. + * :class:`~stem.util.connection.DownloadFailed` if the download fails """
if descriptor_type is None: @@ -309,12 +256,7 @@ class File(object):
:returns: **str** with the path we downloaded to
- :raises: - * **socket.timeout** if our request timed out - * **urllib2.URLError** for most request failures - - Note that the urllib2 module may fail with other exception types, in - which case we'll pass it along. + :raises: :class:`~stem.util.connection.DownloadFailed` if the download fails """
# TODO: If checksums get added to the index we should replace @@ -334,7 +276,7 @@ class File(object): elif os.path.exists(path): return path # file already exists
- response = _download(COLLECTOR_URL + self.path, timeout, retries) + response = stem.util.connection.download(COLLECTOR_URL + self.path, timeout, retries)
if decompress: response = self.compression.decompress(response) @@ -441,8 +383,7 @@ class CollecTor(object):
* **ValueError** if json is malformed * **IOError** if unable to decompress - * **socket.timeout** if our request timed out - * **urllib2.URLError** for most request failures + * :class:`~stem.util.connection.DownloadFailed` if the download fails """
if not self._cached_index or time.time() - self._cached_index_at >= REFRESH_INDEX_RATE: @@ -456,7 +397,7 @@ class CollecTor(object):
extension = compression.extension if compression != Compression.PLAINTEXT else '' url = COLLECTOR_URL + 'index/index.json' + extension - response = compression.decompress(_download(url, self.timeout, self.retries)) + response = compression.decompress(stem.util.connection.download(url, self.timeout, self.retries))
self._cached_index = json.loads(stem.util.str_tools._to_unicode(response)) self._cached_index_at = time.time() @@ -478,8 +419,7 @@ class CollecTor(object):
* **ValueError** if json is malformed * **IOError** if unable to decompress - * **socket.timeout** if our request timed out - * **urllib2.URLError** for most request failures + * :class:`~stem.util.connection.DownloadFailed` if the download fails """
if not self._cached_files or time.time() - self._cached_index_at >= REFRESH_INDEX_RATE: diff --git a/stem/util/connection.py b/stem/util/connection.py index c23d74e7..7be7fe09 100644 --- a/stem/util/connection.py +++ b/stem/util/connection.py @@ -8,6 +8,10 @@ Connection and networking based utility functions.
::
+ DownloadFailed - Inability to download a resource. + +- DownloadTimeout - Download timeout reached. + + download - download from a given url get_connections - quieries the connections belonging to a given process system_resolvers - provides connection resolution methods that are likely to be available port_usage - brief description of the common usage for a port @@ -58,6 +62,10 @@ import collections import os import platform import re +import socket +import sys +import time +import traceback
import stem.util import stem.util.proc @@ -65,6 +73,12 @@ import stem.util.system
from stem.util import conf, enum, log, str_tools
+try: + # account for urllib's change between python 2.x and 3.x + import urllib.request as urllib +except ImportError: + import urllib2 as urllib + # Connection resolution is risky to log about since it's highly likely to # contain sensitive information. That said, it's also difficult to get right in # a platform independent fashion. To opt into the logging requried to @@ -162,6 +176,104 @@ class Connection(collections.namedtuple('Connection', ['local_address', 'local_p """
+class DownloadFailed(IOError): + """ + Inability to download a resource. Python's urllib module raises + a wide variety of undocumented exceptions (urllib2.URLError, + socket.timeout, and others). + + This wraps lower level failures in a common exception type that + retains their exception and `stacktrace + https://docs.python.org/3/library/traceback.html`_. + + .. versionadded:: 1.8.0 + + :var str url: url we failed to download from + :var Exception error: original urllib exception + :var traceback stacktrace: original stacktrace + :var str stacktrace_str: string representation of the stacktrace + """ + + def __init__(self, url, error, stacktrace, message = None): + if message is None: + # The string representation of exceptions can reside in several places. + # urllib.URLError use a 'reason' attribute that in turn may referrence + # low level structures such as socket.gaierror. Whereas most exceptions + # use a 'message' attribute. + + reason = str(error) + + all_str_repr = ( + getattr(getattr(error, 'reason', None), 'strerror', None), + getattr(error, 'reason', None), + getattr(error, 'message', None), + ) + + for str_repr in all_str_repr: + if str_repr and isinstance(str_repr, str): + reason = str_repr + break + + message = 'Failed to download from %s (%s): %s' % (url, type(error).__name__, reason) + + super(DownloadFailed, self).__init__(message) + + self.url = url + self.error = error + self.stacktrace = stacktrace + self.stacktrace_str = ''.join(traceback.format_tb(stacktrace)) + + +class DownloadTimeout(DownloadFailed): + """ + Timeout reached while downloading this resource. + + .. versionadded:: 1.8.0 + """ + + def __init__(self, url, error, stacktrace, timeout): + super(DownloadTimeout, self).__init__('Failed to download from %s: %0.1f second timeout reached' % (url, timeout)) + + +def download(url, timeout = None, retries = None): + """ + Download from the given url. + + .. versionadded:: 1.8.0 + + :param str url: uncompressed url to download from + :param int timeout: timeout when connection becomes idle, no timeout applied + if **None** + :param int retires: maximum attempts to impose + + :returns: **bytes** content of the given url + + :raises: :class:`~stem.util.connection.DownloadFailed` if the download fails + """ + + if retries is None: + retries = 0 + + start_time = time.time() + + try: + return urllib.urlopen(url, timeout = timeout).read() + except socket.timeout as exc: + raise DownloadTimeout(url, exc, sys.exc_info()[2], timeout) + except: + exc, stacktrace = sys.exc_info()[1:3] + + if timeout is not None: + timeout -= time.time() - start_time + + if retries > 0 and (timeout is None or timeout > 0): + log.debug('Failed to download from %s (%i retries remaining): %s' % (url, retries, exc)) + return download(url, timeout, retries - 1) + else: + log.debug('Failed to download from %s: %s' % (url, exc)) + raise DownloadFailed(url, exc, stacktrace) + + def get_connections(resolver = None, process_pid = None, process_name = None): """ Retrieves a list of the current connections for a given process. This diff --git a/test/integ/util/connection.py b/test/integ/util/connection.py index 12ce8ac4..4617fe56 100644 --- a/test/integ/util/connection.py +++ b/test/integ/util/connection.py @@ -5,11 +5,18 @@ that we're running.
import unittest
+import stem.util.connection import stem.util.system import test.require import test.runner
-from stem.util.connection import RESOLVER_COMMAND, Resolver, get_connections, system_resolvers +from stem.util.connection import Resolver + +try: + # account for urllib's change between python 2.x and 3.x + import urllib.request as urllib +except ImportError: + import urllib2 as urllib
class TestConnection(unittest.TestCase): @@ -20,22 +27,40 @@ class TestConnection(unittest.TestCase): if test.runner.Torrc.PORT not in runner.get_options(): self.skipTest('(no control port)') return - elif resolver not in system_resolvers(): + elif resolver not in stem.util.connection.system_resolvers(): self.skipTest('(resolver unavailable on this platform)') return
with runner.get_tor_socket(): - connections = get_connections(resolver, process_pid = runner.get_pid()) + connections = stem.util.connection.get_connections(resolver, process_pid = runner.get_pid())
for conn in connections: if conn.local_address == '127.0.0.1' and conn.local_port == test.runner.CONTROL_PORT: return
- resolver_command = RESOLVER_COMMAND[resolver].format(pid = runner.get_pid()) + resolver_command = stem.util.connection.RESOLVER_COMMAND[resolver].format(pid = runner.get_pid()) resolver_output = stem.util.system.call(resolver_command)
self.fail('Unable to find our controller connection with %s (%s). Connections found were...\n\n%s\n\nCommand output was...\n\n%s' % (resolver, resolver_command, '\n'.join(map(str, connections)), resolver_output))
+ @test.require.only_run_once + @test.require.online + def test_download(self): + response = stem.util.connection.download('https://collector.torproject.org/index/index.json') + self.assertTrue(b'"path":"https://collector.torproject.org"' in response) + + @test.require.only_run_once + @test.require.online + def test_download_failure(self): + try: + stem.util.connection.download('https://no.such.testing.url') + self.fail('expected a stem.util.connection.DownloadFailed to be raised') + except stem.util.connection.DownloadFailed as exc: + self.assertEqual('Failed to download from https://no.such.testing.url (URLError): Name or service not known', str(exc)) + self.assertEqual('https://no.such.testing.url', exc.url) + self.assertEqual('Name or service not known', exc.error.reason.strerror) + self.assertEqual(urllib.URLError, type(exc.error)) + def test_connections_by_proc(self): self.check_resolver(Resolver.PROC)
diff --git a/test/unit/util/connection.py b/test/unit/util/connection.py index a2162029..57718446 100644 --- a/test/unit/util/connection.py +++ b/test/unit/util/connection.py @@ -2,6 +2,7 @@ Unit tests for the stem.util.connection functions. """
+import io import platform import unittest
@@ -10,11 +11,20 @@ import stem.util.connection from stem.util.connection import Resolver, Connection
try: + # account for urllib's change between python 2.x and 3.x + import urllib.request as urllib +except ImportError: + import urllib2 as urllib + +try: # added in python 3.3 from unittest.mock import Mock, patch except ImportError: from mock import Mock, patch
+URL_OPEN = 'urllib.request.urlopen' if stem.prereq.is_python_3() else 'urllib2.urlopen' +URL = 'https://example.unit.test.url' + NETSTAT_OUTPUT = """\ Active Internet connections (w/o servers) Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name @@ -166,6 +176,39 @@ _tor tor 15843 20* internet stream tcp 0x0 192.168.1.100:36174 -->
class TestConnection(unittest.TestCase): + @patch(URL_OPEN) + def test_download(self, urlopen_mock): + urlopen_mock.return_value = io.BytesIO(b'hello') + + self.assertEqual(b'hello', stem.util.connection.download(URL)) + urlopen_mock.assert_called_with(URL, timeout = None) + + @patch(URL_OPEN) + def test_download_failure(self, urlopen_mock): + urlopen_mock.side_effect = urllib.URLError('boom') + + try: + stem.util.connection.download(URL) + self.fail('expected a stem.util.connection.DownloadFailed to be raised') + except stem.util.connection.DownloadFailed as exc: + self.assertEqual('Failed to download from https://example.unit.test.url (URLError): boom', str(exc)) + self.assertEqual(URL, exc.url) + self.assertEqual('boom', exc.error.reason) + self.assertEqual(urllib.URLError, type(exc.error)) + self.assertTrue('return urllib.urlopen(url, timeout = timeout).read()' in exc.stacktrace_str) + + @patch(URL_OPEN) + def test_download_retries(self, urlopen_mock): + urlopen_mock.side_effect = urllib.URLError('boom') + + self.assertRaisesRegexp(IOError, 'boom', stem.util.connection.download, URL) + self.assertEqual(1, urlopen_mock.call_count) + + urlopen_mock.reset_mock() + + self.assertRaisesRegexp(IOError, 'boom', stem.util.connection.download, URL, retries = 4) + self.assertEqual(5, urlopen_mock.call_count) + @patch('os.access') @patch('stem.util.system.is_available') @patch('stem.util.proc.is_available')
tor-commits@lists.torproject.org