[tor-commits] [stem/master] Add CollecTor to our descriptor tutorial

atagar at torproject.org atagar at torproject.org
Sat Aug 17 20:44:27 UTC 2019


commit 6a44d211342a727b71824825262123aeaf300c99
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Aug 17 13:22:53 2019 -0700

    Add CollecTor to our descriptor tutorial
    
    Replacing our deprecated stem.descriptor.reader example with usage of our new
    collector module.
---
 docs/_static/example/collector_caching.py    | 18 ++++++++++
 docs/_static/example/collector_reading.py    | 10 ++++++
 docs/_static/example/past_descriptors.py     |  5 ---
 docs/api.rst                                 |  1 +
 docs/api/descriptor/collector.rst            |  5 +++
 docs/change_log.rst                          |  3 +-
 docs/contents.rst                            |  1 +
 docs/tutorials/mirror_mirror_on_the_wall.rst | 13 +++++--
 stem/descriptor/collector.py                 | 51 ++++++----------------------
 stem/descriptor/server_descriptor.py         | 17 ++++++----
 test/unit/tutorial.py                        | 18 +++++-----
 11 files changed, 78 insertions(+), 64 deletions(-)

diff --git a/docs/_static/example/collector_caching.py b/docs/_static/example/collector_caching.py
new file mode 100644
index 00000000..bff63c47
--- /dev/null
+++ b/docs/_static/example/collector_caching.py
@@ -0,0 +1,18 @@
+import datetime
+import stem.descriptor
+import stem.descriptor.collector
+
+yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1)
+cache_dir = '~/descriptor_cache/server_desc_today'
+
+collector = stem.descriptor.collector.CollecTor()
+
+for f in collector.files('server-descriptor', start = yesterday):
+  f.download(cache_dir)
+
+# then later...
+
+for f in collector.files('server-descriptor', start = yesterday):
+  for desc in f.read(cache_dir):
+    if desc.exit_policy.is_exiting_allowed():
+      print('  %s (%s)' % (desc.nickname, desc.fingerprint))
diff --git a/docs/_static/example/collector_reading.py b/docs/_static/example/collector_reading.py
new file mode 100644
index 00000000..06cc913a
--- /dev/null
+++ b/docs/_static/example/collector_reading.py
@@ -0,0 +1,10 @@
+import datetime
+import stem.descriptor.collector
+
+yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1)
+
+# provide yesterday's exits
+
+for desc in stem.descriptor.collector.get_server_descriptors(start = yesterday):
+  if desc.exit_policy.is_exiting_allowed():
+    print('  %s (%s)' % (desc.nickname, desc.fingerprint))
diff --git a/docs/_static/example/past_descriptors.py b/docs/_static/example/past_descriptors.py
deleted file mode 100644
index 41004845..00000000
--- a/docs/_static/example/past_descriptors.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from stem.descriptor.reader import DescriptorReader
-
-with DescriptorReader(["/home/atagar/server-descriptors-2013-03.tar"]) as reader:
-  for desc in reader:
-    print("found relay %s (%s)" % (desc.nickname, desc.fingerprint))
diff --git a/docs/api.rst b/docs/api.rst
index 2e2f9fae..a8ba7e24 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -43,6 +43,7 @@ remotely like Tor does.
 * `stem.directory <api/directory.html>`_ - Directory authority and fallback directory information.
 * `stem.descriptor.reader <api/descriptor/reader.html>`_ - Reads and parses descriptor files from disk.
 * `stem.descriptor.remote <api/descriptor/remote.html>`_ - Downloads descriptors from directory mirrors and authorities.
+* `stem.descriptor.collector <api/descriptor/collector.html>`_ - Downloads past descriptors from `CollecTor <https://metrics.torproject.org/collector.html>`_.
 * `stem.descriptor.export <api/descriptor/export.html>`_ - Exports descriptors to other formats.
 
 Utilities
diff --git a/docs/api/descriptor/collector.rst b/docs/api/descriptor/collector.rst
new file mode 100644
index 00000000..e699d0e7
--- /dev/null
+++ b/docs/api/descriptor/collector.rst
@@ -0,0 +1,5 @@
+CollecTor
+=========
+
+.. automodule:: stem.descriptor.collector
+
diff --git a/docs/change_log.rst b/docs/change_log.rst
index fec98f5b..a2337e60 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -56,6 +56,7 @@ The following are only available within Stem's `git repository
 
  * **Descriptors**
 
+  * Added the `stem.descriptor.collector <api/descriptor/collector.html>`_ module.
   * `Bandwidth file support <api/descriptor/bandwidth_file.html>`_ (:trac:`29056`)
   * Ed25519 validity checks are now done though the cryptography module rather than PyNaCl (:trac:`22022`)
   * Download compressed descriptors by default (:trac:`29186`)
@@ -134,7 +135,7 @@ and the `stem.directory module <api/directory.html>`_.
   * Added the *orport_v6* attribute to the :class:`~stem.directory.Authority` class
   * Added server descriptor's new is_hidden_service_dir attribute
   * Added the network status vote's new bandwidth_file_headers attribute (:spec:`84591df`)
-  * Added the microdescriptor router status entry's new or_addresses attribute (:trac:`26405`, :spec:`fdc8f3e8`)
+  * Added the microdescriptor router status entry's new or_addresses attribute (:trac:`26405`, :spec:`fdc8f3e`)
   * Don't retry downloading descriptors when we've timed out
   * Don't download from tor26, an authority that frequently timeout
   * Replaced Bifroest bridge authority with Serge (:trac:`26771`)
diff --git a/docs/contents.rst b/docs/contents.rst
index fb4d6b24..267979e0 100644
--- a/docs/contents.rst
+++ b/docs/contents.rst
@@ -43,6 +43,7 @@ Contents
 
    api/descriptor/bandwidth_file
    api/descriptor/certificate
+   api/descriptor/collector
    api/descriptor/descriptor
    api/descriptor/server_descriptor
    api/descriptor/extrainfo_descriptor
diff --git a/docs/tutorials/mirror_mirror_on_the_wall.rst b/docs/tutorials/mirror_mirror_on_the_wall.rst
index eed4c65c..04cc86de 100644
--- a/docs/tutorials/mirror_mirror_on_the_wall.rst
+++ b/docs/tutorials/mirror_mirror_on_the_wall.rst
@@ -117,10 +117,17 @@ Where can I get past descriptors?
 ---------------------------------
 
 Descriptor archives are available from `CollecTor
-<https://collector.torproject.org/>`_. These archives can be read with
-the `DescriptorReader <../api/descriptor/reader.html>`_...
+<https://metrics.torproject.org/collector.html>`_. If you need Tor's topology
+at a prior point in time this is the place to go!
 
-.. literalinclude:: /_static/example/past_descriptors.py
+With CollecTor you can either read descriptors directly...
+
+.. literalinclude:: /_static/example/collector_reading.py
+   :language: python
+
+... or download the descriptors to disk and read them later.
+
+.. literalinclude:: /_static/example/collector_caching.py
    :language: python
 
 .. _can-i-get-descriptors-from-the-tor-process:
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index 20eb6872..28fcbd49 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -2,50 +2,19 @@
 # See LICENSE for licensing information
 
 """
-Module for downloading from Tor's descriptor archive, CollecTor...
+Descriptor archives are available from `CollecTor
+<https://metrics.torproject.org/collector.html>`_. If you need Tor's topology
+at a prior point in time this is the place to go!
 
-  https://collector.torproject.org/
+With CollecTor you can either read descriptors directly...
 
-This stores descriptors going back in time. If you need to know what the
-network topology looked like at a past point in time, this is the place to go.
-
-With this you can either download and read directly from CollecTor...
-
-::
-
-  import datetime
-  import stem.descriptor.collector
-
-  yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1)
-
-  # provide yesterday's exits
-
-  for desc in stem.descriptor.collector.get_server_descriptors(start = yesterday):
-    if desc.exit_policy.is_exiting_allowed():
-      print('  %s (%s)' % (desc.nickname, desc.fingerprint))
+.. literalinclude:: /_static/example/collector_reading.py
+   :language: python
 
 ... or download the descriptors to disk and read them later.
 
-::
-
-  import datetime
-  import stem.descriptor
-  import stem.descriptor.collector
-
-  yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1)
-  cache_dir = '~/descriptor_cache/server_desc_today'
-
-  collector = stem.descriptor.collector.CollecTor()
-
-  for f in collector.files('server-descriptor', start = yesterday):
-    f.download(cache_dir)
-
-  # then later...
-
-  for f in collector.files('server-descriptor', start = yesterday):
-    for desc in f.read(cache_dir):
-      if desc.exit_policy.is_exiting_allowed():
-        print('  %s (%s)' % (desc.nickname, desc.fingerprint))
+.. literalinclude:: /_static/example/collector_caching.py
+   :language: python
 
 ::
 
@@ -282,7 +251,7 @@ class File(object):
     :param str descriptor_type: `descriptor type
       <https://metrics.torproject.org/collector.html#data-formats>`_, this is
       guessed if not provided
-    :var stem.descriptor.__init__.DocumentHandler document_handler: method in
+    :param stem.descriptor.__init__.DocumentHandler document_handler: method in
       which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
     :param int timeout: timeout when connection becomes idle, no timeout
       applied if **None**
@@ -553,7 +522,7 @@ class CollecTor(object):
     :param datetime.datetime end: time range to end with
     :param str cache_to: directory to cache archives into, if an archive is
       available here it is not downloaded
-    :var stem.descriptor.__init__.DocumentHandler document_handler: method in
+    :param stem.descriptor.__init__.DocumentHandler document_handler: method in
       which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
     :param int version: consensus variant to retrieve (versions 2 or 3)
     :param bool microdescriptor: provides the microdescriptor consensus if
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 2d9133f5..85e35f57 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -869,21 +869,26 @@ class RelayDescriptor(ServerDescriptor):
         self.certificate.validate(self)
 
   @classmethod
-  def content(cls, attr = None, exclude = (), sign = False, signing_key = None):
+  def content(cls, attr = None, exclude = (), sign = False, signing_key = None, exit_policy = None):
     if signing_key:
       sign = True
 
     if attr is None:
       attr = {}
 
-    base_header = (
+    if exit_policy is None:
+      exit_policy = REJECT_ALL_POLICY
+
+    base_header = [
       ('router', '%s %s 9001 0 0' % (_random_nickname(), _random_ipv4_address())),
       ('published', _random_date()),
       ('bandwidth', '153600 256000 104590'),
-      ('reject', '*:*'),
+    ] + [
+      tuple(line.split(' ', 1)) for line in str(exit_policy).splitlines()
+    ] + [
       ('onion-key', _random_crypto_blob('RSA PUBLIC KEY')),
       ('signing-key', _random_crypto_blob('RSA PUBLIC KEY')),
-    )
+    ]
 
     if sign:
       if attr and 'signing-key' in attr:
@@ -909,8 +914,8 @@ class RelayDescriptor(ServerDescriptor):
       ))
 
   @classmethod
-  def create(cls, attr = None, exclude = (), validate = True, sign = False, signing_key = None):
-    return cls(cls.content(attr, exclude, sign, signing_key), validate = validate, skip_crypto_validation = not sign)
+  def create(cls, attr = None, exclude = (), validate = True, sign = False, signing_key = None, exit_policy = None):
+    return cls(cls.content(attr, exclude, sign, signing_key, exit_policy), validate = validate, skip_crypto_validation = not sign)
 
   @lru_cache()
   def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX):
diff --git a/test/unit/tutorial.py b/test/unit/tutorial.py
index c74b1912..9d70a5cf 100644
--- a/test/unit/tutorial.py
+++ b/test/unit/tutorial.py
@@ -8,10 +8,10 @@ import unittest
 import stem.descriptor.remote
 
 from stem.control import Controller
-from stem.descriptor.reader import DescriptorReader
 from stem.descriptor.router_status_entry import RouterStatusEntryV2, RouterStatusEntryV3
 from stem.descriptor.networkstatus import NetworkStatusDocumentV3
 from stem.descriptor.server_descriptor import RelayDescriptor
+from stem.exit_policy import ExitPolicy
 from test.unit import exec_documentation_example
 
 try:
@@ -165,13 +165,15 @@ class TestTutorial(unittest.TestCase):
     self.assertEqual('found relay caerSidi (A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB)\n', stdout_mock.getvalue())
 
   @patch('sys.stdout', new_callable = StringIO)
-  @patch('stem.descriptor.reader.DescriptorReader', spec = DescriptorReader)
-  def test_mirror_mirror_on_the_wall_4(self, reader_mock, stdout_mock):
-    reader = reader_mock().__enter__()
-    reader.__iter__.return_value = iter([RelayDescriptor.create({'router': 'caerSidi 71.35.133.197 9001 0 0'})])
-
-    exec_documentation_example('past_descriptors.py')
-    self.assertEqual('found relay caerSidi (None)\n', stdout_mock.getvalue())
+  @patch('stem.descriptor.collector.get_server_descriptors')
+  def test_mirror_mirror_on_the_wall_4(self, get_desc_mock, stdout_mock):
+    get_desc_mock.return_value = iter([RelayDescriptor.create({
+      'router': 'caerSidi 71.35.133.197 9001 0 0',
+      'fingerprint': '2C3C 4662 5698 B6D6 7DF3 2BC1 918A D3EE 1F99 06B1',
+    }, exit_policy = ExitPolicy('accept *:*'), validate = False)])
+
+    exec_documentation_example('collector_reading.py')
+    self.assertEqual('  caerSidi (2C3C46625698B6D67DF32BC1918AD3EE1F9906B1)\n', stdout_mock.getvalue())
 
   @patch('sys.stdout', new_callable = StringIO)
   @patch('stem.descriptor.remote.DescriptorDownloader')





More information about the tor-commits mailing list