[tor-commits] [stem/master] Working zstd support

atagar at torproject.org atagar at torproject.org
Sun Apr 1 04:40:01 UTC 2018


commit 5da64e67948c07a3c509825e6fecef32cd7bbd8d
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Mar 31 21:13:18 2018 -0700

    Working zstd support
    
    Oops, turns out we weren't using the officially suggested zstd python module.
    Moving to the following did the trick...
    
      https://pypi.python.org/pypi/zstandard
---
 stem/descriptor/remote.py | 52 +++++++++++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 9b71be8d..e1190fc7 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -92,8 +92,8 @@ content. For example...
   =============== ===========
   **PLAINTEXT**   Uncompressed data.
   **GZIP**        `GZip compression <https://www.gnu.org/software/gzip/>`_.
-  **ZSTD**        `Zstandard compression <https://www.zstd.net>`_.
-  **LZMA**        `LZMA compression <https://en.wikipedia.org/wiki/LZMA>`_.
+  **ZSTD**        `Zstandard compression <https://www.zstd.net>`_, this requires the `zstandard module <https://pypi.python.org/pypi/zstandard>`_.
+  **LZMA**        `LZMA compression <https://en.wikipedia.org/wiki/LZMA>`_, this requires the 'lzma module <https://docs.python.org/3/library/lzma.html>`_.
   =============== ===========
 """
 
@@ -133,9 +133,18 @@ except ImportError:
   LZMA_SUPPORTED = False
 
 try:
-  # https://pypi.python.org/pypi/zstd
+  # We use the suggested python zstd library...
+  #
+  #   https://pypi.python.org/pypi/zstandard
+  #
+  # Unfortunately this installs as a zstd module which can be confused with...
+  #
+  #   https://pypi.python.org/pypi/zstd
+  #
+  # As such checking for the specific decompression class we'll need.
+
   import zstd
-  ZSTD_SUPPORTED = True
+  ZSTD_SUPPORTED = hasattr(zstd, 'ZstdDecompressor')
 except ImportError:
   ZSTD_SUPPORTED = False
 
@@ -146,8 +155,8 @@ Compression = stem.util.enum.Enum(
   ('LZMA', 'x-tor-lzma'),
 )
 
-ZSTD_UNAVAILABLE_MSG = 'ZSTD is not yet supported'
-LZMA_UNAVAILABLE_MSG = 'LZMA compression was requested but requires the lzma module, which was added in python 3.3'
+ZSTD_UNAVAILABLE_MSG = 'ZSTD compression requires the zstandard module (https://pypi.python.org/pypi/zstandard)'
+LZMA_UNAVAILABLE_MSG = 'LZMA compression requires the lzma module (https://docs.python.org/3/library/lzma.html)'
 
 # Tor has a limited number of descriptors we can fetch explicitly by their
 # fingerprint or hashes due to a limit on the url length by squid proxies.
@@ -307,9 +316,9 @@ class Query(object):
   /tor/keys/fp/<v3ident1>+<v3ident2>              key certificates for specific authorities
   =============================================== ===========
 
-  **LZMA** compression requires the `lzma module
-  <https://docs.python.org/3/library/lzma.html>`_ which was added in Python
-  3.3.
+  **ZSTD** compression requires `zstandard
+  <https://pypi.python.org/pypi/zstandard>`_, and **LZMA** requires the `lzma
+  module <https://docs.python.org/3/library/lzma.html>`_.
 
   For legacy reasons if our resource has a '.z' suffix then our **compression**
   argument is overwritten with Compression.GZIP.
@@ -367,14 +376,14 @@ class Query(object):
       if isinstance(compression, str):
         compression = [compression]  # caller provided only a single option
 
-      if Compression.LZMA in compression and not LZMA_SUPPORTED:
-        log.log_once('stem.descriptor.remote.lzma_unavailable', log.INFO, LZMA_UNAVAILABLE_MSG)
-        compression.remove(Compression.LZMA)
-
       if Compression.ZSTD in compression and not ZSTD_SUPPORTED:
         log.log_once('stem.descriptor.remote.zstd_unavailable', log.INFO, ZSTD_UNAVAILABLE_MSG)
         compression.remove(Compression.ZSTD)
 
+      if Compression.LZMA in compression and not LZMA_SUPPORTED:
+        log.log_once('stem.descriptor.remote.lzma_unavailable', log.INFO, LZMA_UNAVAILABLE_MSG)
+        compression.remove(Compression.LZMA)
+
       if not compression:
         compression = [Compression.PLAINTEXT]
 
@@ -528,13 +537,22 @@ class Query(object):
       data = response.read()
       encoding = response.info().getheader('Content-Encoding')
 
-      if encoding in (Compression.GZIP, 'deflate'):
-        # The '32' is for automatic header detection...
-        # https://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
+      # Tor doesn't include compression headers. As such when using gzip we
+      # need to include '32' for automatic header detection...
+      #
+      #   https://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
+      #
+      # ... and with zstd we need to use the streaming API.
 
+      if encoding in (Compression.GZIP, 'deflate'):
         data = zlib.decompress(data, zlib.MAX_WBITS | 32)
       elif encoding == Compression.ZSTD and ZSTD_SUPPORTED:
-        data = zstd.decompress(data)
+        output_buffer = io.BytesIO()
+
+        with zstd.ZstdDecompressor().write_to(output_buffer) as decompressor:
+          decompressor.write(data)
+
+        data = output_buffer.getvalue()
       elif encoding == Compression.LZMA and LZMA_SUPPORTED:
         data = lzma.decompress(data)
 





More information about the tor-commits mailing list