commit 9e9458501fa4b2f8819f93ff853c658b9e63366c Author: Damian Johnson atagar@torproject.org Date: Tue Jun 18 18:37:05 2019 -0700
Compression class
Both our collector and remote modules need to decompress descriptors, so adding a little helper class to assist with availability checks and decompression. --- stem/descriptor/__init__.py | 79 +++++++++++++++++++++++++++++++ stem/descriptor/collector.py | 77 +++--------------------------- test/integ/descriptor/collector.py | 9 ++-- test/settings.cfg | 3 +- test/unit/descriptor/collector.py | 11 +++-- test/unit/descriptor/compression.py | 39 +++++++++++++++ test/unit/descriptor/data/compressed_bz2 | Bin 0 -> 1691 bytes 7 files changed, 138 insertions(+), 80 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 4d13ec60..0b3fda91 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -11,6 +11,8 @@ Package for parsing and processing descriptor data. parse_file - Parses the descriptors in a file. create_signing_key - Cretes a signing key that can be used for creating descriptors.
+ Compression - method of descriptor decompression + Descriptor - Common parent for all descriptor file types. | |- content - creates the text of a new descriptor | |- create - creates a new descriptor @@ -172,6 +174,83 @@ DocumentHandler = stem.util.enum.UppercaseEnum( )
+class _Compression(object): + """ + Compression method supported by CollecTor. + + :var bool available: **True** if this method of decryption is available, + **False** otherwise + :var str encoding: `http 'Accept-Encoding' parameter https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens`_ + :var str extension: file extension of this compression + + .. versionadded:: 1.8.0 + """ + + def __init__(self, name, module, encoding, extension, decompression_func): + if module is None: + self._module = None + self.available = True + else: + # Compression modules are optional. Usually gzip and bz2 are available, + # but they might be missing if compiling python yourself. As for lzma it + # was added in python 3.3. + + try: + self._module = __import__(module) + self.available = True + except ImportError: + self._module = None + self.available = False + + self.extension = extension + self.encoding = encoding + + self._name = name + self._module_name = module + self._decompression_func = decompression_func + + def decompress(self, content): + """ + Decompresses the given content via this method. + + :param bytes content: content to be decompressed + + :returns: **bytes** with the decompressed content + + :raises: + If unable to decompress this provide... + + * **IOError** if content isn't compressed with this + * **ImportError** if this method if decompression is unavalable + """ + + if not self.available: + raise ImportError("'%s' decompression module is unavailable" % self._module_name) + + return self._decompression_func(self._module, content) + + def __str__(self): + return self._name + + +def _zstd_decompress(module, content): + output_buffer = io.BytesIO() + + with module.ZstdDecompressor().write_to(output_buffer) as decompressor: + decompressor.write(content) + + return output_buffer.getvalue() + + +Compression = stem.util.enum.Enum( + ('PLAINTEXT', _Compression('plaintext', None, 'identity', '.txt', lambda module, content: content)), + ('GZIP', _Compression('gzip', 'zlib', 'gzip', '.gz', lambda module, content: module.decompress(content, module.MAX_WBITS | 32))), + ('BZ2', _Compression('bzip2', 'bz2', 'bzip2', '.bz2', lambda module, content: module.decompress(content))), + ('LZMA', _Compression('lzma', 'lzma', 'x-tor-lzma', '.xz', lambda module, content: module.decompress(content))), + ('ZSTD', _Compression('zstd', 'zstd', 'zstd', '.zst', _zstd_decompress)), +) + + class TypeAnnotation(collections.namedtuple('TypeAnnotation', ['name', 'major_version', 'minor_version'])): """ `Tor metrics type annotation diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py index 21a774e9..b3f99241 100644 --- a/stem/descriptor/collector.py +++ b/stem/descriptor/collector.py @@ -50,10 +50,11 @@ With this you can either download and read directly from CollecTor... .. versionadded:: 1.8.0 """
-import io import json import time
+from stem.descriptor import Compression + try: # account for urllib's change between python 2.x and 3.x import urllib.request as urllib @@ -68,82 +69,18 @@ COLLECTOR_URL = 'https://collector.torproject.org/' REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old
-class Compression(object): - """ - Compression method supported by CollecTor. - - :var bool available: **True** if this method of decryption is available, - **False** otherwise - :var str extension: file extension of this compression - """ - - def __init__(self, module, extension): - # Compression modules are optional. Usually gzip and bz2 are available, but - # they might be missing if compiling python yourself. As for lzma it was - # added in python 3.3. - - try: - self._module = __import__(module) - self.available = True - except ImportError: - self._module = None - self.available = False - - self.extension = extension - self._module_name = module - - def decompress(self, content): - """ - Decompresses the given content via this method. - - :param bytes content: content to be decompressed - - :returns: **bytes** with the decompressed content - - :raises: - If unable to decompress this provide... - - * **IOError** if content isn't compressed with this - * **ImportError** if this method if decompression is unavalable - """ - - if not self.available: - raise ImportError("'%s' decompression module is unavailable" % self) - - if self._module_name == 'gzip': - if stem.prereq.is_python_3(): - return self._module.decompress(content) - else: - # prior to python 3.2 gzip only had GzipFile - return self._module.GzipFile(fileobj = io.BytesIO(content)).read() - elif self._module_name == 'bz2': - return self._module.decompress(content) - elif self._module_name == 'lzma': - return self._module.decompress(content) - else: - raise ImportError('BUG: No implementation for %s decompression' % self) - - def __str__(self): - return self._module_name - - -GZIP = Compression('gzip', '.gz') -BZ2 = Compression('bz2', '.bz2') -LZMA = Compression('lzma', '.xz') - - def url(resource, compression = None): """ Provides CollecTor url for the given resource.
:param str resource: resource type of the url - :param descriptor.collector.Compression compression: compression type to + :param descriptor.Compression compression: compression type to download from
:returns: **str** with the CollecTor url """
- # TODO: Not yet sure how to most elegantly map resources to urls. No doubt + # TODO: Unsure how to most elegantly map resources to urls. No doubt # this'll change as we add more types.
if resource == 'index': @@ -152,7 +89,7 @@ def url(resource, compression = None): raise ValueError("'%s' isn't a recognized resource type" % resource)
suffix = compression.extension if compression else '' - return ''.join((COLLECTOR_URL, '/'.join(path), suffix)) + return COLLECTOR_URL + '/'.join(path) + suffix
class CollecTor(object): @@ -161,7 +98,7 @@ class CollecTor(object): provided in `an index https://collector.torproject.org/index/index.json`_ that's fetched as required.
- :var descriptor.collector.Compression compression: compression type to + :var descriptor.Compression compression: compression type to download from, if undefiled we'll use the best decompression available :var int retries: number of times to attempt the request if downloading it fails @@ -172,7 +109,7 @@ class CollecTor(object): if compression == 'best': self.compression = None
- for option in (LZMA, BZ2, GZIP): + for option in (Compression.LZMA, Compression.BZ2, Compression.GZIP): if option.available: self.compression = option break diff --git a/test/integ/descriptor/collector.py b/test/integ/descriptor/collector.py index 1af329a5..dbb09d5a 100644 --- a/test/integ/descriptor/collector.py +++ b/test/integ/descriptor/collector.py @@ -6,7 +6,8 @@ import unittest
import test.require
-from stem.descriptor.collector import GZIP, BZ2, LZMA, CollecTor +from stem.descriptor import Compression +from stem.descriptor.collector import CollecTor
class TestCollector(unittest.TestCase): @@ -18,17 +19,17 @@ class TestCollector(unittest.TestCase): @test.require.only_run_once @test.require.online def test_index_gzip(self): - self._test_index(GZIP) + self._test_index(Compression.GZIP)
@test.require.only_run_once @test.require.online def test_index_bz2(self): - self._test_index(BZ2) + self._test_index(Compression.BZ2)
@test.require.only_run_once @test.require.online def test_index_lzma(self): - self._test_index(LZMA) + self._test_index(Compression.LZMA)
def _test_index(self, compression): if compression and not compression.available: diff --git a/test/settings.cfg b/test/settings.cfg index 6f71a329..1bdb1a0a 100644 --- a/test/settings.cfg +++ b/test/settings.cfg @@ -239,10 +239,11 @@ test.unit_tests |test.unit.util.tor_tools.TestTorTools |test.unit.util.__init__.TestBaseUtil |test.unit.installation.TestInstallation -|test.unit.descriptor.collector.TestCollector |test.unit.descriptor.descriptor.TestDescriptor +|test.unit.descriptor.compression.TestCompression |test.unit.descriptor.export.TestExport |test.unit.descriptor.reader.TestDescriptorReader +|test.unit.descriptor.collector.TestCollector |test.unit.descriptor.remote.TestDescriptorDownloader |test.unit.descriptor.server_descriptor.TestServerDescriptor |test.unit.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py index a0f8cb2e..b2b464ce 100644 --- a/test/unit/descriptor/collector.py +++ b/test/unit/descriptor/collector.py @@ -7,7 +7,8 @@ import unittest
import stem.prereq
-from stem.descriptor.collector import GZIP, BZ2, LZMA, CollecTor, url +from stem.descriptor import Compression +from stem.descriptor.collector import CollecTor, url
try: # added in python 3.3 @@ -22,9 +23,9 @@ class TestCollector(unittest.TestCase): def test_url(self): self.assertEqual('https://collector.torproject.org/index/index.json', url('index')) self.assertEqual('https://collector.torproject.org/index/index.json', url('index', compression = None)) - self.assertEqual('https://collector.torproject.org/index/index.json.gz', url('index', compression = GZIP)) - self.assertEqual('https://collector.torproject.org/index/index.json.bz2', url('index', compression = BZ2)) - self.assertEqual('https://collector.torproject.org/index/index.json.xz', url('index', compression = LZMA)) + self.assertEqual('https://collector.torproject.org/index/index.json.gz', url('index', compression = Compression.GZIP)) + self.assertEqual('https://collector.torproject.org/index/index.json.bz2', url('index', compression = Compression.BZ2)) + self.assertEqual('https://collector.torproject.org/index/index.json.xz', url('index', compression = Compression.LZMA))
@patch(URL_OPEN, Mock(return_value = io.BytesIO(b'{"index_created":"2017-12-25 21:06","build_revision":"56a303e","path":"https://collector.torproject.org%22%7D'))) def test_index(self): @@ -47,7 +48,7 @@ class TestCollector(unittest.TestCase): self.assertRaisesRegexp(ValueError, 'No JSON object could be decoded', collector.index)
def test_index_malformed_compression(self): - for compression in (GZIP, BZ2, LZMA): + for compression in (Compression.GZIP, Compression.BZ2, Compression.LZMA): with patch(URL_OPEN, Mock(return_value = io.BytesIO(b'not compressed'))): collector = CollecTor(compression = compression) self.assertRaisesRegexp(IOError, 'Unable to decompress response as %s' % compression, collector.index) diff --git a/test/unit/descriptor/compression.py b/test/unit/descriptor/compression.py new file mode 100644 index 00000000..3945bc9c --- /dev/null +++ b/test/unit/descriptor/compression.py @@ -0,0 +1,39 @@ +""" +Unit tests for stem.descriptor.Compression. +""" + +import unittest + +from stem.descriptor import Compression + +from test.unit.descriptor import get_resource + + +class TestCompression(unittest.TestCase): + def test_decompress_plaintext(self): + self._check_file(Compression.PLAINTEXT, 'compressed_identity') + + def test_decompress_gzip(self): + self._check_file(Compression.GZIP, 'compressed_gzip') + + def test_decompress_bz2(self): + self._check_file(Compression.BZ2, 'compressed_bz2') + + def test_decompress_lzma(self): + self._check_file(Compression.LZMA, 'compressed_lzma') + + def test_decompress_zstd(self): + self._check_file(Compression.ZSTD, 'compressed_zstd') + + def _check_file(self, compression, filename): + """ + Decompress one of our 'compressed_*' server descriptors. + """ + + if not compression.available: + self.skipTest('(%s unavailable)' % compression) + return + + with open(get_resource(filename), 'rb') as compressed_file: + content = compression.decompress(compressed_file.read()) + self.assertTrue(content.startswith(b'router moria1 128.31.0.34 9101 0 9131')) diff --git a/test/unit/descriptor/data/compressed_bz2 b/test/unit/descriptor/data/compressed_bz2 new file mode 100644 index 00000000..4d645a71 Binary files /dev/null and b/test/unit/descriptor/data/compressed_bz2 differ
tor-commits@lists.torproject.org