[stem/master] Working zstd support

commit 5da64e67948c07a3c509825e6fecef32cd7bbd8d Author: Damian Johnson <atagar@torproject.org> Date: Sat Mar 31 21:13:18 2018 -0700 Working zstd support Oops, turns out we weren't using the officially suggested zstd python module. Moving to the following did the trick... https://pypi.python.org/pypi/zstandard --- stem/descriptor/remote.py | 52 +++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 9b71be8d..e1190fc7 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -92,8 +92,8 @@ content. For example... =============== =========== **PLAINTEXT** Uncompressed data. **GZIP** `GZip compression <https://www.gnu.org/software/gzip/>`_. - **ZSTD** `Zstandard compression <https://www.zstd.net>`_. - **LZMA** `LZMA compression <https://en.wikipedia.org/wiki/LZMA>`_. + **ZSTD** `Zstandard compression <https://www.zstd.net>`_, this requires the `zstandard module <https://pypi.python.org/pypi/zstandard>`_. + **LZMA** `LZMA compression <https://en.wikipedia.org/wiki/LZMA>`_, this requires the 'lzma module <https://docs.python.org/3/library/lzma.html>`_. =============== =========== """ @@ -133,9 +133,18 @@ except ImportError: LZMA_SUPPORTED = False try: - # https://pypi.python.org/pypi/zstd + # We use the suggested python zstd library... + # + # https://pypi.python.org/pypi/zstandard + # + # Unfortunately this installs as a zstd module which can be confused with... + # + # https://pypi.python.org/pypi/zstd + # + # As such checking for the specific decompression class we'll need. + import zstd - ZSTD_SUPPORTED = True + ZSTD_SUPPORTED = hasattr(zstd, 'ZstdDecompressor') except ImportError: ZSTD_SUPPORTED = False @@ -146,8 +155,8 @@ Compression = stem.util.enum.Enum( ('LZMA', 'x-tor-lzma'), ) -ZSTD_UNAVAILABLE_MSG = 'ZSTD is not yet supported' -LZMA_UNAVAILABLE_MSG = 'LZMA compression was requested but requires the lzma module, which was added in python 3.3' +ZSTD_UNAVAILABLE_MSG = 'ZSTD compression requires the zstandard module (https://pypi.python.org/pypi/zstandard)' +LZMA_UNAVAILABLE_MSG = 'LZMA compression requires the lzma module (https://docs.python.org/3/library/lzma.html)' # Tor has a limited number of descriptors we can fetch explicitly by their # fingerprint or hashes due to a limit on the url length by squid proxies. @@ -307,9 +316,9 @@ class Query(object): /tor/keys/fp/<v3ident1>+<v3ident2> key certificates for specific authorities =============================================== =========== - **LZMA** compression requires the `lzma module - <https://docs.python.org/3/library/lzma.html>`_ which was added in Python - 3.3. + **ZSTD** compression requires `zstandard + <https://pypi.python.org/pypi/zstandard>`_, and **LZMA** requires the `lzma + module <https://docs.python.org/3/library/lzma.html>`_. For legacy reasons if our resource has a '.z' suffix then our **compression** argument is overwritten with Compression.GZIP. @@ -367,14 +376,14 @@ class Query(object): if isinstance(compression, str): compression = [compression] # caller provided only a single option - if Compression.LZMA in compression and not LZMA_SUPPORTED: - log.log_once('stem.descriptor.remote.lzma_unavailable', log.INFO, LZMA_UNAVAILABLE_MSG) - compression.remove(Compression.LZMA) - if Compression.ZSTD in compression and not ZSTD_SUPPORTED: log.log_once('stem.descriptor.remote.zstd_unavailable', log.INFO, ZSTD_UNAVAILABLE_MSG) compression.remove(Compression.ZSTD) + if Compression.LZMA in compression and not LZMA_SUPPORTED: + log.log_once('stem.descriptor.remote.lzma_unavailable', log.INFO, LZMA_UNAVAILABLE_MSG) + compression.remove(Compression.LZMA) + if not compression: compression = [Compression.PLAINTEXT] @@ -528,13 +537,22 @@ class Query(object): data = response.read() encoding = response.info().getheader('Content-Encoding') - if encoding in (Compression.GZIP, 'deflate'): - # The '32' is for automatic header detection... - # https://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompr... + # Tor doesn't include compression headers. As such when using gzip we + # need to include '32' for automatic header detection... + # + # https://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompr... + # + # ... and with zstd we need to use the streaming API. + if encoding in (Compression.GZIP, 'deflate'): data = zlib.decompress(data, zlib.MAX_WBITS | 32) elif encoding == Compression.ZSTD and ZSTD_SUPPORTED: - data = zstd.decompress(data) + output_buffer = io.BytesIO() + + with zstd.ZstdDecompressor().write_to(output_buffer) as decompressor: + decompressor.write(data) + + data = output_buffer.getvalue() elif encoding == Compression.LZMA and LZMA_SUPPORTED: data = lzma.decompress(data)
participants (1)
-
atagar@torproject.org