[tor-commits] [stem/master] Stub initial CollecTor module

atagar at torproject.org atagar at torproject.org
Sat Aug 17 20:44:26 UTC 2019


commit ce8474dcf61cdb3800108e3820dadcef545220ee
Author: Damian Johnson <atagar at torproject.org>
Date:   Mon Dec 25 13:10:14 2017 -0800

    Stub initial CollecTor module
    
    Presently doesn't do much. Just starting with url resolution and fetching the
    index.
---
 stem/descriptor/__init__.py        |   1 +
 stem/descriptor/collector.py       | 145 +++++++++++++++++++++++++++++++++++++
 test/integ/descriptor/__init__.py  |   1 +
 test/integ/descriptor/collector.py |  20 +++++
 test/settings.cfg                  |   2 +
 test/unit/descriptor/__init__.py   |   1 +
 test/unit/descriptor/collector.py  |  16 ++++
 7 files changed, 186 insertions(+)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index ef6530ed..4d13ec60 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -113,6 +113,7 @@ except ImportError:
 __all__ = [
   'bandwidth_file',
   'certificate',
+  'collector',
   'export',
   'extrainfo_descriptor',
   'hidden_service_descriptor',
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
new file mode 100644
index 00000000..ca0e6921
--- /dev/null
+++ b/stem/descriptor/collector.py
@@ -0,0 +1,145 @@
+# Copyright 2017, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Module for downloading from Tor's descriptor archive, CollecTor...
+
+  https://collector.torproject.org/
+
+This stores descriptors going back in time. If you need to know what the
+network topology looked like at a past point in time, this is the place to go.
+
+With this you can either download and read directly from CollecTor...
+
+::
+
+  import datetime
+  import stem.descriptor.collector
+
+  collector = stem.descriptor.collector.CollecTor()
+  yesterday = datetime.date.today() - datetime.timedelta(1)
+
+  # provide yesterday's exits
+
+  for desc in collector.get_server_descriptors(start = yesterday):
+    if desc.exit_policy.is_exiting_allowed():
+      print '  %s (%s)' % (desc.nickname, desc.fingerprint)
+
+... or download the descriptors to disk and read them later.
+
+::
+
+  import datetime
+  import stem.descriptor.collector
+  import stem.descriptor.reader
+
+  collector = stem.descriptor.collector.CollecTor()
+  yesterday = datetime.date.today() - datetime.timedelta(1)
+
+  collector.download_server_descriptors(
+    destination = '~/descriptor_cache',
+    start = yesterday,
+  ).join()
+
+  reader = stem.descriptor.reader.DescriptorReader('~/descriptor_cache')
+
+  for desc in reader:
+    if desc.exit_policy.is_exiting_allowed():
+      print '  %s (%s)' % (desc.nickname, desc.fingerprint)
+
+.. versionadded:: 1.7.0
+"""
+
+import json
+import time
+
+try:
+  # account for urllib's change between python 2.x and 3.x
+  import urllib.request as urllib
+except ImportError:
+  import urllib2 as urllib
+
+import stem.util.enum
+
+Compression = stem.util.enum.Enum('NONE', 'BZ2', 'GZ', 'XZ')
+
+COLLECTOR_URL = 'https://collector.torproject.org/'
+REFRESH_INDEX_RATE = 3600  # get new index if cached copy is an hour old
+
+COMPRESSION_SUFFIX = {
+  Compression.NONE: '',
+  Compression.BZ2: '.bz2',
+  Compression.GZ: '.gz',
+  Compression.XZ: '.xz',
+}
+
+
+def url(resource, compression = Compression.NONE):
+  """
+  Provides CollecTor url for the given resource.
+
+  :param str resource: resource type of the url
+  :param descriptor.collector.Compression compression: compression type to
+    download from
+
+  :returns: **str** with the CollecTor url
+  """
+
+  if compression not in COMPRESSION_SUFFIX:
+    raise ValueError("'%s' isn't a compression enumeration" % compression)
+
+  # TODO: Not yet sure how to most elegantly map resources to urls. No doubt
+  # this'll change as we add more types.
+
+  if resource == 'index':
+    path = ('index', 'index.json')
+  else:
+    raise ValueError("'%s' isn't a recognized resource type" % resource)
+
+  return ''.join((COLLECTOR_URL, '/'.join(path), COMPRESSION_SUFFIX[compression]))
+
+
+class CollecTor(object):
+  """
+  Downloader for descriptors from CollecTor. The contents of CollecTor are
+  provided in `an index <https://collector.torproject.org/index/index.json>`_
+  that's fetched as required.
+
+  :var descriptor.collector.Compression compression: compression type to
+    download from
+  :var int retries: number of times to attempt the request if downloading it
+    fails
+  :var float timeout: duration before we'll time out our request
+  """
+
+  def __init__(self, compression = Compression.XZ, retries = 2, timeout = None):
+    self.compression = compression
+    self.retries = retries
+    self.timeout = timeout
+
+    self._cached_index = None
+    self._cached_index_at = 0
+
+  def index(self):
+    """
+    Provides the archives available in CollecTor.
+
+    :returns: **dict** with the archive contents
+
+    :raises:
+      If unable to retrieve the index this provide...
+
+        * **ValueError** if the index is malformed
+        * **socket.timeout** if our request timed out
+        * **urllib2.URLError** for most request failures
+    """
+
+    if not self._cached_index or time.time() - self._cached_index_at >= REFRESH_INDEX_RATE:
+      response = urllib.urlopen(url('index', self.compression), timeout = self.timeout).read()
+
+      # TODO: add compression and retry support
+
+      self._cached_index = json.loads(response)
+      self._cached_index_at = time.time()
+
+    return self._cached_index
diff --git a/test/integ/descriptor/__init__.py b/test/integ/descriptor/__init__.py
index 331316a2..2ed1feef 100644
--- a/test/integ/descriptor/__init__.py
+++ b/test/integ/descriptor/__init__.py
@@ -3,6 +3,7 @@ Integration tests for stem.descriptor.* contents.
 """
 
 __all__ = [
+  'collector',
   'extrainfo_descriptor',
   'microdescriptor',
   'networkstatus',
diff --git a/test/integ/descriptor/collector.py b/test/integ/descriptor/collector.py
new file mode 100644
index 00000000..25f5d503
--- /dev/null
+++ b/test/integ/descriptor/collector.py
@@ -0,0 +1,20 @@
+"""
+Integration tests for stem.descriptor.collector.
+"""
+
+import unittest
+
+import test.require
+
+from stem.descriptor.collector import CollecTor, Compression
+
+
+class TestCollector(unittest.TestCase):
+  @test.require.only_run_once
+  @test.require.online
+  def test_index(self):
+    collector = CollecTor(compression = Compression.NONE)
+    index = collector.index()
+
+    self.assertEqual('https://collector.torproject.org', index['path'])
+    self.assertEqual(['archive', 'recent'], [entry['path'] for entry in index['directories']])
diff --git a/test/settings.cfg b/test/settings.cfg
index d422ffa8..6f71a329 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -239,6 +239,7 @@ test.unit_tests
 |test.unit.util.tor_tools.TestTorTools
 |test.unit.util.__init__.TestBaseUtil
 |test.unit.installation.TestInstallation
+|test.unit.descriptor.collector.TestCollector
 |test.unit.descriptor.descriptor.TestDescriptor
 |test.unit.descriptor.export.TestExport
 |test.unit.descriptor.reader.TestDescriptorReader
@@ -309,6 +310,7 @@ test.integ_tests
 |test.integ.connection.connect.TestConnect
 |test.integ.control.base_controller.TestBaseController
 |test.integ.control.controller.TestController
+|test.integ.descriptor.collector.TestCollector
 |test.integ.descriptor.remote.TestDescriptorDownloader
 |test.integ.descriptor.server_descriptor.TestServerDescriptor
 |test.integ.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor
diff --git a/test/unit/descriptor/__init__.py b/test/unit/descriptor/__init__.py
index a2c03f1d..c5cf01e1 100644
--- a/test/unit/descriptor/__init__.py
+++ b/test/unit/descriptor/__init__.py
@@ -6,6 +6,7 @@ import os
 
 __all__ = [
   'bandwidth_file',
+  'collector',
   'export',
   'extrainfo_descriptor',
   'microdescriptor',
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py
new file mode 100644
index 00000000..5dc12164
--- /dev/null
+++ b/test/unit/descriptor/collector.py
@@ -0,0 +1,16 @@
+"""
+Unit tests for stem.descriptor.collector.
+"""
+
+import unittest
+
+from stem.descriptor.collector import Compression, url
+
+
+class TestCollector(unittest.TestCase):
+  def test_url(self):
+    self.assertEqual('https://collector.torproject.org/index/index.json', url('index'))
+    self.assertEqual('https://collector.torproject.org/index/index.json', url('index', compression = Compression.NONE))
+    self.assertEqual('https://collector.torproject.org/index/index.json.gz', url('index', compression = Compression.GZ))
+    self.assertEqual('https://collector.torproject.org/index/index.json.bz2', url('index', compression = Compression.BZ2))
+    self.assertEqual('https://collector.torproject.org/index/index.json.xz', url('index', compression = Compression.XZ))





More information about the tor-commits mailing list