[tor-commits] [stem/master] Use Compression class in stem.descriptor.remote

Sat Aug 17 20:44:26 UTC 2019

commit 0c84d3cf4dcedd3629e6bb51c607240b1b2257b9
Author: Damian Johnson <atagar at torproject.org>
Date:   Wed Jun 19 15:55:39 2019 -0700

    Use Compression class in stem.descriptor.remote
    
    Now that our new Compression class is in place, deduplicating the remote
    module's decompression code with it.
---
 stem/descriptor/__init__.py    |  9 ++++--
 stem/descriptor/remote.py      | 67 ++++++++++++++++++++++++------------------
 test/unit/descriptor/remote.py | 11 +++----
 3 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 0b3fda91..2c69aef0 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -225,7 +225,12 @@ class _Compression(object):
     """
 
     if not self.available:
-      raise ImportError("'%s' decompression module is unavailable" % self._module_name)
+      if self.name == 'zstd':
+        raise ImportError('Decompressing zstd data requires https://pypi.org/project/zstandard/')
+      elif self.name == 'lzma':
+        raise ImportError('Decompressing lzma data requires https://docs.python.org/3/library/lzma.html')
+      else:
+        raise ImportError("'%s' decompression module is unavailable" % self._module_name)
 
     return self._decompression_func(self._module, content)
 
@@ -247,7 +252,7 @@ Compression = stem.util.enum.Enum(
   ('GZIP', _Compression('gzip', 'zlib', 'gzip', '.gz', lambda module, content: module.decompress(content, module.MAX_WBITS | 32))),
   ('BZ2', _Compression('bzip2', 'bz2', 'bzip2', '.bz2', lambda module, content: module.decompress(content))),
   ('LZMA', _Compression('lzma', 'lzma', 'x-tor-lzma', '.xz', lambda module, content: module.decompress(content))),
-  ('ZSTD', _Compression('zstd', 'zstd', 'zstd', '.zst', _zstd_decompress)),
+  ('ZSTD', _Compression('zstd', 'zstd', 'x-zstd', '.zst', _zstd_decompress)),
 )
 
 
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 15f46070..f715d743 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -100,7 +100,6 @@ import random
 import sys
 import threading
 import time
-import zlib
 
 import stem
 import stem.client
@@ -119,6 +118,8 @@ try:
 except ImportError:
   import urllib2 as urllib
 
+# TODO: remove in stem 2.x, replaced with stem.descriptor.Compression
+
 Compression = stem.util.enum.Enum(
   ('PLAINTEXT', 'identity'),
   ('GZIP', 'gzip'),  # can also be 'deflate'
@@ -126,6 +127,13 @@ Compression = stem.util.enum.Enum(
   ('LZMA', 'x-tor-lzma'),
 )
 
+COMPRESSION_MIGRATION = {
+  'identity': stem.descriptor.Compression.PLAINTEXT,
+  'gzip': stem.descriptor.Compression.GZIP,
+  'x-zstd': stem.descriptor.Compression.ZSTD,
+  'x-tor-lzma': stem.descriptor.Compression.LZMA,
+}
+
 # Tor has a limited number of descriptors we can fetch explicitly by their
 # fingerprint or hashes due to a limit on the url length by squid proxies.
 
@@ -364,6 +372,11 @@ class Query(object):
   .. versionchanged:: 1.8.0
      Defaulting to gzip compression rather than plaintext downloads.
 
+  .. versionchanged:: 1.8.0
+     Using :class:`~stem.descriptor.__init__.Compression` for our compression
+     argument, usage of strings or this module's Compression enum is deprecated
+     and will be removed in stem 2.x.
+
   :var str resource: resource being fetched, such as '/tor/server/all'
   :var str descriptor_type: type of descriptors being fetched (for options see
     :func:`~stem.descriptor.__init__.parse_file`), this is guessed from the
@@ -371,7 +384,7 @@ class Query(object):
 
   :var list endpoints: :class:`~stem.DirPort` or :class:`~stem.ORPort` of the
     authority or mirror we're querying, this uses authorities if undefined
-  :var list compression: list of :data:`stem.descriptor.remote.Compression`
+  :var list compression: list of :data:`stem.descriptor.Compression`
     we're willing to accept, when none are mutually supported downloads fall
     back to Compression.PLAINTEXT
   :var int retries: number of times to attempt the request if downloading it
@@ -429,6 +442,19 @@ class Query(object):
       if not compression:
         compression = [Compression.PLAINTEXT]
 
+    # TODO: Normalize from our old compression enum to
+    # stem.descriptor.Compression. This will get removed in Stem 2.x.
+
+    new_compression = []
+
+    for legacy_compression in compression:
+      if isinstance(legacy_compression, stem.descriptor._Compression):
+        new_compression.append(legacy_compression)
+      elif legacy_compression in COMPRESSION_MIGRATION:
+        new_compression.append(COMPRESSION_MIGRATION[legacy_compression])
+      else:
+        raise ValueError("'%s' (%s) is not a recognized type of compression" % (legacy_compression, type(legacy_compression).__name__))
+
     if descriptor_type:
       self.descriptor_type = descriptor_type
     else:
@@ -446,7 +472,7 @@ class Query(object):
           raise ValueError("Endpoints must be an stem.ORPort, stem.DirPort, or two value tuple. '%s' is a %s." % (endpoint, type(endpoint).__name__))
 
     self.resource = resource
-    self.compression = compression
+    self.compression = new_compression
     self.retries = retries
     self.fall_back_to_authority = fall_back_to_authority
 
@@ -1009,7 +1035,7 @@ def _download_from_orport(endpoint, compression, resource):
     with relay.create_circuit() as circ:
       request = '\r\n'.join((
         'GET %s HTTP/1.0' % resource,
-        'Accept-Encoding: %s' % ', '.join(compression),
+        'Accept-Encoding: %s' % ', '.join(map(lambda c: c.encoding, compression)),
         'User-Agent: %s' % stem.USER_AGENT,
       )) + '\r\n\r\n'
 
@@ -1051,7 +1077,7 @@ def _download_from_dirport(url, compression, timeout):
     urllib.Request(
       url,
       headers = {
-        'Accept-Encoding': ', '.join(compression),
+        'Accept-Encoding': ', '.join(map(lambda c: c.encoding, compression)),
         'User-Agent': stem.USER_AGENT,
       }
     ),
@@ -1080,29 +1106,14 @@ def _decompress(data, encoding):
     * **ImportError** if missing the decompression module
   """
 
-  if encoding == Compression.PLAINTEXT:
-    return data
-  elif encoding in (Compression.GZIP, 'deflate'):
-    return zlib.decompress(data, zlib.MAX_WBITS | 32)
-  elif encoding == Compression.ZSTD:
-    if not stem.prereq.is_zstd_available():
-      raise ImportError('Decompressing zstd data requires https://pypi.org/project/zstandard/')
-
-    import zstd
-    output_buffer = io.BytesIO()
-
-    with zstd.ZstdDecompressor().write_to(output_buffer) as decompressor:
-      decompressor.write(data)
-
-    return output_buffer.getvalue()
-  elif encoding == Compression.LZMA:
-    if not stem.prereq.is_lzma_available():
-      raise ImportError('Decompressing lzma data requires https://docs.python.org/3/library/lzma.html')
-
-    import lzma
-    return lzma.decompress(data)
-  else:
-    raise ValueError("'%s' isn't a recognized type of encoding" % encoding)
+  if encoding == 'deflate':
+    return stem.descriptor.Compression.GZIP.decompress(data)
+
+  for compression in stem.descriptor.Compression:
+    if encoding == compression.encoding:
+      return compression.decompress(data)
+
+  raise ValueError("'%s' isn't a recognized type of encoding" % encoding)
 
 
 def _guess_descriptor_type(resource):
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index 8c5e835b..6dbaf43e 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -8,6 +8,7 @@ import time
 import unittest
 
 import stem
+import stem.descriptor
 import stem.descriptor.remote
 import stem.prereq
 import stem.util.str_tools
@@ -181,26 +182,26 @@ class TestDescriptorDownloader(unittest.TestCase):
 
   def test_gzip_url_override(self):
     query = stem.descriptor.remote.Query(TEST_RESOURCE + '.z', compression = Compression.PLAINTEXT, start = False)
-    self.assertEqual([Compression.GZIP], query.compression)
+    self.assertEqual([stem.descriptor.Compression.GZIP], query.compression)
     self.assertEqual(TEST_RESOURCE, query.resource)
 
   def test_zstd_support_check(self):
     with patch('stem.prereq.is_zstd_available', Mock(return_value = True)):
       query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.ZSTD, start = False)
-      self.assertEqual([Compression.ZSTD], query.compression)
+      self.assertEqual([stem.descriptor.Compression.ZSTD], query.compression)
 
     with patch('stem.prereq.is_zstd_available', Mock(return_value = False)):
       query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.ZSTD, start = False)
-      self.assertEqual([Compression.PLAINTEXT], query.compression)
+      self.assertEqual([stem.descriptor.Compression.PLAINTEXT], query.compression)
 
   def test_lzma_support_check(self):
     with patch('stem.prereq.is_lzma_available', Mock(return_value = True)):
       query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.LZMA, start = False)
-      self.assertEqual([Compression.LZMA], query.compression)
+      self.assertEqual([stem.descriptor.Compression.LZMA], query.compression)
 
     with patch('stem.prereq.is_lzma_available', Mock(return_value = False)):
       query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.LZMA, start = False)
-      self.assertEqual([Compression.PLAINTEXT], query.compression)
+      self.assertEqual([stem.descriptor.Compression.PLAINTEXT], query.compression)
 
   @patch(URL_OPEN, _dirport_mock(read_resource('compressed_identity'), encoding = 'identity'))
   def test_compression_plaintext(self):