commit 601266ef7576de121d12c6b275df9b2103e336d7 Author: Damian Johnson atagar@torproject.org Date: Tue Jul 23 14:27:09 2019 -0700
Move compression argument into index method
Originally I thought that CollecTor offered each file with all compression types (like DirPorts) but on reflection that's wrong. The index is the only file that's compressed in multiple ways so moving the 'compression' argument from CollecTor's constructor to the index() method.
Overall this makes things quite a bit cleaner. --- stem/descriptor/__init__.py | 9 ++++--- stem/descriptor/collector.py | 54 +++++++++++++++------------------------ test/unit/descriptor/collector.py | 48 +++++++++++++++++----------------- 3 files changed, 50 insertions(+), 61 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 2c69aef0..2044c7bf 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -225,14 +225,17 @@ class _Compression(object): """
if not self.available: - if self.name == 'zstd': + if self._name == 'zstd': raise ImportError('Decompressing zstd data requires https://pypi.org/project/zstandard/') - elif self.name == 'lzma': + elif self._name == 'lzma': raise ImportError('Decompressing lzma data requires https://docs.python.org/3/library/lzma.html') else: raise ImportError("'%s' decompression module is unavailable" % self._module_name)
- return self._decompression_func(self._module, content) + try: + return self._decompression_func(self._module, content) + except Exception as exc: + raise IOError('Failed to decompress as %s: %s' % (self, exc))
def __str__(self): return self._name diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py index bb9f1925..754a96b9 100644 --- a/stem/descriptor/collector.py +++ b/stem/descriptor/collector.py @@ -64,10 +64,6 @@ try: except ImportError: import urllib2 as urllib
-import stem.prereq -import stem.util.enum -import stem.util.str_tools - COLLECTOR_URL = 'https://collector.torproject.org/' REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old
@@ -113,12 +109,11 @@ COLLECTOR_DESC_TYPES = { }
-def _download(url, compression, timeout, retries): +def _download(url, timeout, retries): """ Download from the given url.
:param str url: uncompressed url to download from - :param descriptor.Compression compression: decompression type :param int timeout: timeout when connection becomes idle, no timeout applied if **None** :param int retires: maximum attempts to impose @@ -135,13 +130,9 @@ def _download(url, compression, timeout, retries): """
start_time = time.time() - extension = compression.extension if compression not in (None, Compression.PLAINTEXT) else '' - - if not url.endswith(extension): - url += extension
try: - response = urllib.urlopen(url, timeout = timeout).read() + return urllib.urlopen(url, timeout = timeout).read() except: exc = sys.exc_info()[1]
@@ -150,19 +141,11 @@ def _download(url, compression, timeout, retries):
if retries > 0 and (timeout is None or timeout > 0): log.debug("Failed to download from CollecTor at '%s' (%i retries remaining): %s" % (url, retries, exc)) - return _download(url, compression, timeout, retries - 1) + return _download(url, timeout, retries - 1) else: log.debug("Failed to download from CollecTor at '%s': %s" % (url, exc)) raise
- if compression not in (None, Compression.PLAINTEXT): - try: - response = compression.decompress(response) - except Exception as exc: - raise IOError('Unable to decompress %s response from %s: %s' % (compression, url, exc)) - - return stem.util.str_tools._to_unicode(response) -
class File(object): """ @@ -262,15 +245,12 @@ class CollecTor(object): provided in `an index https://collector.torproject.org/index/index.json`_ that's fetched as required.
- :var descriptor.Compression compression: compression type to - download from, if undefiled we'll use the best decompression available :var int retries: number of times to attempt the request if downloading it fails :var float timeout: duration before we'll time out our request """
- def __init__(self, compression = 'best', retries = 2, timeout = None): - self.compression = Compression.PLAINTEXT + def __init__(self, retries = 2, timeout = None): self.retries = retries self.timeout = timeout
@@ -278,18 +258,13 @@ class CollecTor(object): self._cached_files = None self._cached_index_at = 0
- if compression == 'best': - for option in (Compression.LZMA, Compression.BZ2, Compression.GZIP): - if option.available: - self.compression = option - break - elif compression is not None: - self.compression = compression - - def index(self): + def index(self, compression = 'best'): """ Provides the archives available in CollecTor.
+ :param descriptor.Compression compression: compression type to + download from, if undefiled we'll use the best decompression available + :returns: :class:`~stem.descriptor.collector.Index` with the archive contents
@@ -303,7 +278,18 @@ class CollecTor(object): """
if not self._cached_index or time.time() - self._cached_index_at >= REFRESH_INDEX_RATE: - response = _download(COLLECTOR_URL + 'index/index.json', self.compression, self.timeout, self.retries) + if compression == 'best': + for option in (Compression.LZMA, Compression.BZ2, Compression.GZIP, Compression.PLAINTEXT): + if option.available: + compression = option + break + elif compression is None: + compression = Compression.PLAINTEXT + + extension = compression.extension if compression != Compression.PLAINTEXT else '' + url = COLLECTOR_URL + 'index/index.json' + extension + response = compression.decompress(_download(url, self.timeout, self.retries)) + self._cached_index = json.loads(response) self._cached_index_at = time.time()
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py index 9d442deb..86641f32 100644 --- a/test/unit/descriptor/collector.py +++ b/test/unit/descriptor/collector.py @@ -38,8 +38,8 @@ class TestCollector(unittest.TestCase): def test_download_plaintext(self, urlopen_mock): urlopen_mock.return_value = io.BytesIO(MINIMAL_INDEX_JSON)
- collector = CollecTor(compression = Compression.PLAINTEXT) - self.assertEqual(MINIMAL_INDEX, collector.index()) + collector = CollecTor() + self.assertEqual(MINIMAL_INDEX, collector.index(Compression.PLAINTEXT)) urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json', timeout = None)
@patch(URL_OPEN) @@ -51,8 +51,8 @@ class TestCollector(unittest.TestCase): import zlib urlopen_mock.return_value = io.BytesIO(zlib.compress(MINIMAL_INDEX_JSON))
- collector = CollecTor(compression = Compression.GZIP) - self.assertEqual(MINIMAL_INDEX, collector.index()) + collector = CollecTor() + self.assertEqual(MINIMAL_INDEX, collector.index(Compression.GZIP)) urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json.gz', timeout = None)
@patch(URL_OPEN) @@ -64,8 +64,8 @@ class TestCollector(unittest.TestCase): import bz2 urlopen_mock.return_value = io.BytesIO(bz2.compress(MINIMAL_INDEX_JSON))
- collector = CollecTor(compression = Compression.BZ2) - self.assertEqual(MINIMAL_INDEX, collector.index()) + collector = CollecTor() + self.assertEqual(MINIMAL_INDEX, collector.index(Compression.BZ2)) urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json.bz2', timeout = None)
@patch(URL_OPEN) @@ -77,8 +77,8 @@ class TestCollector(unittest.TestCase): import lzma urlopen_mock.return_value = io.BytesIO(lzma.compress(MINIMAL_INDEX_JSON))
- collector = CollecTor(compression = Compression.LZMA) - self.assertEqual(MINIMAL_INDEX, collector.index()) + collector = CollecTor() + self.assertEqual(MINIMAL_INDEX, collector.index(Compression.LZMA)) urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json.lzma', timeout = None)
@patch(URL_OPEN) @@ -97,35 +97,35 @@ class TestCollector(unittest.TestCase):
@patch(URL_OPEN, Mock(return_value = io.BytesIO(MINIMAL_INDEX_JSON))) def test_index(self): - collector = CollecTor(compression = Compression.PLAINTEXT) - self.assertEqual(MINIMAL_INDEX, collector.index()) + collector = CollecTor() + self.assertEqual(MINIMAL_INDEX, collector.index(Compression.PLAINTEXT))
@patch(URL_OPEN, Mock(return_value = io.BytesIO(b'not json'))) def test_index_malformed_json(self): - collector = CollecTor(compression = Compression.PLAINTEXT) + collector = CollecTor()
if stem.prereq.is_python_3(): - self.assertRaisesRegexp(ValueError, 'Expecting value: line 1 column 1', collector.index) + self.assertRaisesRegexp(ValueError, 'Expecting value: line 1 column 1', collector.index, Compression.PLAINTEXT) else: - self.assertRaisesRegexp(ValueError, 'No JSON object could be decoded', collector.index) + self.assertRaisesRegexp(ValueError, 'No JSON object could be decoded', collector.index, Compression.PLAINTEXT)
def test_index_malformed_compression(self): for compression in (Compression.GZIP, Compression.BZ2, Compression.LZMA): if not compression.available: - next + continue
with patch(URL_OPEN, Mock(return_value = io.BytesIO(b'not compressed'))): - collector = CollecTor(compression = compression) - self.assertRaisesRegexp(IOError, 'Unable to decompress %s response' % compression, collector.index) + collector = CollecTor() + self.assertRaisesRegexp(IOError, 'Failed to decompress as %s' % compression, collector.index, compression)
@patch(URL_OPEN, Mock(return_value = io.BytesIO(EXAMPLE_INDEX_CONTENT))) def test_real_index(self): - collector = CollecTor(compression = Compression.PLAINTEXT) - self.assertEqual(EXAMPLE_INDEX, collector.index()) + collector = CollecTor() + self.assertEqual(EXAMPLE_INDEX, collector.index(compression = Compression.PLAINTEXT))
- @patch(URL_OPEN, Mock(return_value = io.BytesIO(EXAMPLE_INDEX_CONTENT))) + @patch('stem.descriptor.collector.CollecTor.index', Mock(return_value = EXAMPLE_INDEX)) def test_contents(self): - collector = CollecTor(compression = Compression.PLAINTEXT) + collector = CollecTor() files = collector.files()
self.assertEqual(85, len(files)) @@ -172,9 +172,9 @@ class TestCollector(unittest.TestCase): f = File('recent/relay-descriptors/extra-infos/2019-07-03-23-05-00-extra-infos', 1162899, '2019-07-03 02:05') self.assertEqual(datetime.datetime(2019, 7, 4, 0, 5, 0), f.end)
- @patch(URL_OPEN, Mock(return_value = io.BytesIO(EXAMPLE_INDEX_CONTENT))) + @patch('stem.descriptor.collector.CollecTor.index', Mock(return_value = EXAMPLE_INDEX)) def test_file_query_by_type(self): - collector = CollecTor(compression = Compression.PLAINTEXT) + collector = CollecTor()
expected = [ 'archive/relay-descriptors/server-descriptors/server-descriptors-2005-12.tar.xz', @@ -187,9 +187,9 @@ class TestCollector(unittest.TestCase):
self.assertEqual(expected, map(lambda x: x.path, collector.files(descriptor_type = 'server-descriptor')))
- @patch(URL_OPEN, Mock(return_value = io.BytesIO(EXAMPLE_INDEX_CONTENT))) + @patch('stem.descriptor.collector.CollecTor.index', Mock(return_value = EXAMPLE_INDEX)) def test_file_query_by_date(self): - collector = CollecTor(compression = Compression.PLAINTEXT) + collector = CollecTor()
self.assertEqual([ 'recent/relay-descriptors/server-descriptors/2019-07-03-02-05-00-server-descriptors',
tor-commits@lists.torproject.org