[tor-commits] [stem/master] Move compression argument into index method

atagar at torproject.org atagar at torproject.org
Sat Aug 17 20:44:27 UTC 2019


commit 601266ef7576de121d12c6b275df9b2103e336d7
Author: Damian Johnson <atagar at torproject.org>
Date:   Tue Jul 23 14:27:09 2019 -0700

    Move compression argument into index method
    
    Originally I thought that CollecTor offered each file with all compression
    types (like DirPorts) but on reflection that's wrong. The index is the only
    file that's compressed in multiple ways so moving the 'compression' argument
    from CollecTor's constructor to the index() method.
    
    Overall this makes things quite a bit cleaner.
---
 stem/descriptor/__init__.py       |  9 ++++---
 stem/descriptor/collector.py      | 54 +++++++++++++++------------------------
 test/unit/descriptor/collector.py | 48 +++++++++++++++++-----------------
 3 files changed, 50 insertions(+), 61 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 2c69aef0..2044c7bf 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -225,14 +225,17 @@ class _Compression(object):
     """
 
     if not self.available:
-      if self.name == 'zstd':
+      if self._name == 'zstd':
         raise ImportError('Decompressing zstd data requires https://pypi.org/project/zstandard/')
-      elif self.name == 'lzma':
+      elif self._name == 'lzma':
         raise ImportError('Decompressing lzma data requires https://docs.python.org/3/library/lzma.html')
       else:
         raise ImportError("'%s' decompression module is unavailable" % self._module_name)
 
-    return self._decompression_func(self._module, content)
+    try:
+      return self._decompression_func(self._module, content)
+    except Exception as exc:
+      raise IOError('Failed to decompress as %s: %s' % (self, exc))
 
   def __str__(self):
     return self._name
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index bb9f1925..754a96b9 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -64,10 +64,6 @@ try:
 except ImportError:
   import urllib2 as urllib
 
-import stem.prereq
-import stem.util.enum
-import stem.util.str_tools
-
 COLLECTOR_URL = 'https://collector.torproject.org/'
 REFRESH_INDEX_RATE = 3600  # get new index if cached copy is an hour old
 
@@ -113,12 +109,11 @@ COLLECTOR_DESC_TYPES = {
 }
 
 
-def _download(url, compression, timeout, retries):
+def _download(url, timeout, retries):
   """
   Download from the given url.
 
   :param str url: uncompressed url to download from
-  :param descriptor.Compression compression: decompression type
   :param int timeout: timeout when connection becomes idle, no timeout applied
     if **None**
   :param int retires: maximum attempts to impose
@@ -135,13 +130,9 @@ def _download(url, compression, timeout, retries):
   """
 
   start_time = time.time()
-  extension = compression.extension if compression not in (None, Compression.PLAINTEXT) else ''
-
-  if not url.endswith(extension):
-    url += extension
 
   try:
-    response = urllib.urlopen(url, timeout = timeout).read()
+    return urllib.urlopen(url, timeout = timeout).read()
   except:
     exc = sys.exc_info()[1]
 
@@ -150,19 +141,11 @@ def _download(url, compression, timeout, retries):
 
     if retries > 0 and (timeout is None or timeout > 0):
       log.debug("Failed to download from CollecTor at '%s' (%i retries remaining): %s" % (url, retries, exc))
-      return _download(url, compression, timeout, retries - 1)
+      return _download(url, timeout, retries - 1)
     else:
       log.debug("Failed to download from CollecTor at '%s': %s" % (url, exc))
       raise
 
-  if compression not in (None, Compression.PLAINTEXT):
-    try:
-      response = compression.decompress(response)
-    except Exception as exc:
-      raise IOError('Unable to decompress %s response from %s: %s' % (compression, url, exc))
-
-  return stem.util.str_tools._to_unicode(response)
-
 
 class File(object):
   """
@@ -262,15 +245,12 @@ class CollecTor(object):
   provided in `an index <https://collector.torproject.org/index/index.json>`_
   that's fetched as required.
 
-  :var descriptor.Compression compression: compression type to
-    download from, if undefiled we'll use the best decompression available
   :var int retries: number of times to attempt the request if downloading it
     fails
   :var float timeout: duration before we'll time out our request
   """
 
-  def __init__(self, compression = 'best', retries = 2, timeout = None):
-    self.compression = Compression.PLAINTEXT
+  def __init__(self, retries = 2, timeout = None):
     self.retries = retries
     self.timeout = timeout
 
@@ -278,18 +258,13 @@ class CollecTor(object):
     self._cached_files = None
     self._cached_index_at = 0
 
-    if compression == 'best':
-      for option in (Compression.LZMA, Compression.BZ2, Compression.GZIP):
-        if option.available:
-          self.compression = option
-          break
-    elif compression is not None:
-      self.compression = compression
-
-  def index(self):
+  def index(self, compression = 'best'):
     """
     Provides the archives available in CollecTor.
 
+    :param descriptor.Compression compression: compression type to
+      download from, if undefiled we'll use the best decompression available
+
     :returns: :class:`~stem.descriptor.collector.Index` with the archive
       contents
 
@@ -303,7 +278,18 @@ class CollecTor(object):
     """
 
     if not self._cached_index or time.time() - self._cached_index_at >= REFRESH_INDEX_RATE:
-      response = _download(COLLECTOR_URL + 'index/index.json', self.compression, self.timeout, self.retries)
+      if compression == 'best':
+        for option in (Compression.LZMA, Compression.BZ2, Compression.GZIP, Compression.PLAINTEXT):
+          if option.available:
+            compression = option
+            break
+      elif compression is None:
+        compression = Compression.PLAINTEXT
+
+      extension = compression.extension if compression != Compression.PLAINTEXT else ''
+      url = COLLECTOR_URL + 'index/index.json' + extension
+      response = compression.decompress(_download(url, self.timeout, self.retries))
+
       self._cached_index = json.loads(response)
       self._cached_index_at = time.time()
 
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py
index 9d442deb..86641f32 100644
--- a/test/unit/descriptor/collector.py
+++ b/test/unit/descriptor/collector.py
@@ -38,8 +38,8 @@ class TestCollector(unittest.TestCase):
   def test_download_plaintext(self, urlopen_mock):
     urlopen_mock.return_value = io.BytesIO(MINIMAL_INDEX_JSON)
 
-    collector = CollecTor(compression = Compression.PLAINTEXT)
-    self.assertEqual(MINIMAL_INDEX, collector.index())
+    collector = CollecTor()
+    self.assertEqual(MINIMAL_INDEX, collector.index(Compression.PLAINTEXT))
     urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json', timeout = None)
 
   @patch(URL_OPEN)
@@ -51,8 +51,8 @@ class TestCollector(unittest.TestCase):
     import zlib
     urlopen_mock.return_value = io.BytesIO(zlib.compress(MINIMAL_INDEX_JSON))
 
-    collector = CollecTor(compression = Compression.GZIP)
-    self.assertEqual(MINIMAL_INDEX, collector.index())
+    collector = CollecTor()
+    self.assertEqual(MINIMAL_INDEX, collector.index(Compression.GZIP))
     urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json.gz', timeout = None)
 
   @patch(URL_OPEN)
@@ -64,8 +64,8 @@ class TestCollector(unittest.TestCase):
     import bz2
     urlopen_mock.return_value = io.BytesIO(bz2.compress(MINIMAL_INDEX_JSON))
 
-    collector = CollecTor(compression = Compression.BZ2)
-    self.assertEqual(MINIMAL_INDEX, collector.index())
+    collector = CollecTor()
+    self.assertEqual(MINIMAL_INDEX, collector.index(Compression.BZ2))
     urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json.bz2', timeout = None)
 
   @patch(URL_OPEN)
@@ -77,8 +77,8 @@ class TestCollector(unittest.TestCase):
     import lzma
     urlopen_mock.return_value = io.BytesIO(lzma.compress(MINIMAL_INDEX_JSON))
 
-    collector = CollecTor(compression = Compression.LZMA)
-    self.assertEqual(MINIMAL_INDEX, collector.index())
+    collector = CollecTor()
+    self.assertEqual(MINIMAL_INDEX, collector.index(Compression.LZMA))
     urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json.lzma', timeout = None)
 
   @patch(URL_OPEN)
@@ -97,35 +97,35 @@ class TestCollector(unittest.TestCase):
 
   @patch(URL_OPEN, Mock(return_value = io.BytesIO(MINIMAL_INDEX_JSON)))
   def test_index(self):
-    collector = CollecTor(compression = Compression.PLAINTEXT)
-    self.assertEqual(MINIMAL_INDEX, collector.index())
+    collector = CollecTor()
+    self.assertEqual(MINIMAL_INDEX, collector.index(Compression.PLAINTEXT))
 
   @patch(URL_OPEN, Mock(return_value = io.BytesIO(b'not json')))
   def test_index_malformed_json(self):
-    collector = CollecTor(compression = Compression.PLAINTEXT)
+    collector = CollecTor()
 
     if stem.prereq.is_python_3():
-      self.assertRaisesRegexp(ValueError, 'Expecting value: line 1 column 1', collector.index)
+      self.assertRaisesRegexp(ValueError, 'Expecting value: line 1 column 1', collector.index, Compression.PLAINTEXT)
     else:
-      self.assertRaisesRegexp(ValueError, 'No JSON object could be decoded', collector.index)
+      self.assertRaisesRegexp(ValueError, 'No JSON object could be decoded', collector.index, Compression.PLAINTEXT)
 
   def test_index_malformed_compression(self):
     for compression in (Compression.GZIP, Compression.BZ2, Compression.LZMA):
       if not compression.available:
-        next
+        continue
 
       with patch(URL_OPEN, Mock(return_value = io.BytesIO(b'not compressed'))):
-        collector = CollecTor(compression = compression)
-        self.assertRaisesRegexp(IOError, 'Unable to decompress %s response' % compression, collector.index)
+        collector = CollecTor()
+        self.assertRaisesRegexp(IOError, 'Failed to decompress as %s' % compression, collector.index, compression)
 
   @patch(URL_OPEN, Mock(return_value = io.BytesIO(EXAMPLE_INDEX_CONTENT)))
   def test_real_index(self):
-    collector = CollecTor(compression = Compression.PLAINTEXT)
-    self.assertEqual(EXAMPLE_INDEX, collector.index())
+    collector = CollecTor()
+    self.assertEqual(EXAMPLE_INDEX, collector.index(compression = Compression.PLAINTEXT))
 
-  @patch(URL_OPEN, Mock(return_value = io.BytesIO(EXAMPLE_INDEX_CONTENT)))
+  @patch('stem.descriptor.collector.CollecTor.index', Mock(return_value = EXAMPLE_INDEX))
   def test_contents(self):
-    collector = CollecTor(compression = Compression.PLAINTEXT)
+    collector = CollecTor()
     files = collector.files()
 
     self.assertEqual(85, len(files))
@@ -172,9 +172,9 @@ class TestCollector(unittest.TestCase):
     f = File('recent/relay-descriptors/extra-infos/2019-07-03-23-05-00-extra-infos', 1162899, '2019-07-03 02:05')
     self.assertEqual(datetime.datetime(2019, 7, 4, 0, 5, 0), f.end)
 
-  @patch(URL_OPEN, Mock(return_value = io.BytesIO(EXAMPLE_INDEX_CONTENT)))
+  @patch('stem.descriptor.collector.CollecTor.index', Mock(return_value = EXAMPLE_INDEX))
   def test_file_query_by_type(self):
-    collector = CollecTor(compression = Compression.PLAINTEXT)
+    collector = CollecTor()
 
     expected = [
       'archive/relay-descriptors/server-descriptors/server-descriptors-2005-12.tar.xz',
@@ -187,9 +187,9 @@ class TestCollector(unittest.TestCase):
 
     self.assertEqual(expected, map(lambda x: x.path, collector.files(descriptor_type = 'server-descriptor')))
 
-  @patch(URL_OPEN, Mock(return_value = io.BytesIO(EXAMPLE_INDEX_CONTENT)))
+  @patch('stem.descriptor.collector.CollecTor.index', Mock(return_value = EXAMPLE_INDEX))
   def test_file_query_by_date(self):
-    collector = CollecTor(compression = Compression.PLAINTEXT)
+    collector = CollecTor()
 
     self.assertEqual([
       'recent/relay-descriptors/server-descriptors/2019-07-03-02-05-00-server-descriptors',





More information about the tor-commits mailing list