[stem/master] Add CollecTor to our descriptor tutorial

commit 6a44d211342a727b71824825262123aeaf300c99 Author: Damian Johnson <atagar@torproject.org> Date: Sat Aug 17 13:22:53 2019 -0700 Add CollecTor to our descriptor tutorial Replacing our deprecated stem.descriptor.reader example with usage of our new collector module. --- docs/_static/example/collector_caching.py | 18 ++++++++++ docs/_static/example/collector_reading.py | 10 ++++++ docs/_static/example/past_descriptors.py | 5 --- docs/api.rst | 1 + docs/api/descriptor/collector.rst | 5 +++ docs/change_log.rst | 3 +- docs/contents.rst | 1 + docs/tutorials/mirror_mirror_on_the_wall.rst | 13 +++++-- stem/descriptor/collector.py | 51 ++++++---------------------- stem/descriptor/server_descriptor.py | 17 ++++++---- test/unit/tutorial.py | 18 +++++----- 11 files changed, 78 insertions(+), 64 deletions(-) diff --git a/docs/_static/example/collector_caching.py b/docs/_static/example/collector_caching.py new file mode 100644 index 00000000..bff63c47 --- /dev/null +++ b/docs/_static/example/collector_caching.py @@ -0,0 +1,18 @@ +import datetime +import stem.descriptor +import stem.descriptor.collector + +yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1) +cache_dir = '~/descriptor_cache/server_desc_today' + +collector = stem.descriptor.collector.CollecTor() + +for f in collector.files('server-descriptor', start = yesterday): + f.download(cache_dir) + +# then later... + +for f in collector.files('server-descriptor', start = yesterday): + for desc in f.read(cache_dir): + if desc.exit_policy.is_exiting_allowed(): + print(' %s (%s)' % (desc.nickname, desc.fingerprint)) diff --git a/docs/_static/example/collector_reading.py b/docs/_static/example/collector_reading.py new file mode 100644 index 00000000..06cc913a --- /dev/null +++ b/docs/_static/example/collector_reading.py @@ -0,0 +1,10 @@ +import datetime +import stem.descriptor.collector + +yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1) + +# provide yesterday's exits + +for desc in stem.descriptor.collector.get_server_descriptors(start = yesterday): + if desc.exit_policy.is_exiting_allowed(): + print(' %s (%s)' % (desc.nickname, desc.fingerprint)) diff --git a/docs/_static/example/past_descriptors.py b/docs/_static/example/past_descriptors.py deleted file mode 100644 index 41004845..00000000 --- a/docs/_static/example/past_descriptors.py +++ /dev/null @@ -1,5 +0,0 @@ -from stem.descriptor.reader import DescriptorReader - -with DescriptorReader(["/home/atagar/server-descriptors-2013-03.tar"]) as reader: - for desc in reader: - print("found relay %s (%s)" % (desc.nickname, desc.fingerprint)) diff --git a/docs/api.rst b/docs/api.rst index 2e2f9fae..a8ba7e24 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -43,6 +43,7 @@ remotely like Tor does. * `stem.directory <api/directory.html>`_ - Directory authority and fallback directory information. * `stem.descriptor.reader <api/descriptor/reader.html>`_ - Reads and parses descriptor files from disk. * `stem.descriptor.remote <api/descriptor/remote.html>`_ - Downloads descriptors from directory mirrors and authorities. +* `stem.descriptor.collector <api/descriptor/collector.html>`_ - Downloads past descriptors from `CollecTor <https://metrics.torproject.org/collector.html>`_. * `stem.descriptor.export <api/descriptor/export.html>`_ - Exports descriptors to other formats. Utilities diff --git a/docs/api/descriptor/collector.rst b/docs/api/descriptor/collector.rst new file mode 100644 index 00000000..e699d0e7 --- /dev/null +++ b/docs/api/descriptor/collector.rst @@ -0,0 +1,5 @@ +CollecTor +========= + +.. automodule:: stem.descriptor.collector + diff --git a/docs/change_log.rst b/docs/change_log.rst index fec98f5b..a2337e60 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -56,6 +56,7 @@ The following are only available within Stem's `git repository * **Descriptors** + * Added the `stem.descriptor.collector <api/descriptor/collector.html>`_ module. * `Bandwidth file support <api/descriptor/bandwidth_file.html>`_ (:trac:`29056`) * Ed25519 validity checks are now done though the cryptography module rather than PyNaCl (:trac:`22022`) * Download compressed descriptors by default (:trac:`29186`) @@ -134,7 +135,7 @@ and the `stem.directory module <api/directory.html>`_. * Added the *orport_v6* attribute to the :class:`~stem.directory.Authority` class * Added server descriptor's new is_hidden_service_dir attribute * Added the network status vote's new bandwidth_file_headers attribute (:spec:`84591df`) - * Added the microdescriptor router status entry's new or_addresses attribute (:trac:`26405`, :spec:`fdc8f3e8`) + * Added the microdescriptor router status entry's new or_addresses attribute (:trac:`26405`, :spec:`fdc8f3e`) * Don't retry downloading descriptors when we've timed out * Don't download from tor26, an authority that frequently timeout * Replaced Bifroest bridge authority with Serge (:trac:`26771`) diff --git a/docs/contents.rst b/docs/contents.rst index fb4d6b24..267979e0 100644 --- a/docs/contents.rst +++ b/docs/contents.rst @@ -43,6 +43,7 @@ Contents api/descriptor/bandwidth_file api/descriptor/certificate + api/descriptor/collector api/descriptor/descriptor api/descriptor/server_descriptor api/descriptor/extrainfo_descriptor diff --git a/docs/tutorials/mirror_mirror_on_the_wall.rst b/docs/tutorials/mirror_mirror_on_the_wall.rst index eed4c65c..04cc86de 100644 --- a/docs/tutorials/mirror_mirror_on_the_wall.rst +++ b/docs/tutorials/mirror_mirror_on_the_wall.rst @@ -117,10 +117,17 @@ Where can I get past descriptors? --------------------------------- Descriptor archives are available from `CollecTor -<https://collector.torproject.org/>`_. These archives can be read with -the `DescriptorReader <../api/descriptor/reader.html>`_... +<https://metrics.torproject.org/collector.html>`_. If you need Tor's topology +at a prior point in time this is the place to go! -.. literalinclude:: /_static/example/past_descriptors.py +With CollecTor you can either read descriptors directly... + +.. literalinclude:: /_static/example/collector_reading.py + :language: python + +... or download the descriptors to disk and read them later. + +.. literalinclude:: /_static/example/collector_caching.py :language: python .. _can-i-get-descriptors-from-the-tor-process: diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py index 20eb6872..28fcbd49 100644 --- a/stem/descriptor/collector.py +++ b/stem/descriptor/collector.py @@ -2,50 +2,19 @@ # See LICENSE for licensing information """ -Module for downloading from Tor's descriptor archive, CollecTor... +Descriptor archives are available from `CollecTor +<https://metrics.torproject.org/collector.html>`_. If you need Tor's topology +at a prior point in time this is the place to go! - https://collector.torproject.org/ +With CollecTor you can either read descriptors directly... -This stores descriptors going back in time. If you need to know what the -network topology looked like at a past point in time, this is the place to go. - -With this you can either download and read directly from CollecTor... - -:: - - import datetime - import stem.descriptor.collector - - yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1) - - # provide yesterday's exits - - for desc in stem.descriptor.collector.get_server_descriptors(start = yesterday): - if desc.exit_policy.is_exiting_allowed(): - print(' %s (%s)' % (desc.nickname, desc.fingerprint)) +.. literalinclude:: /_static/example/collector_reading.py + :language: python ... or download the descriptors to disk and read them later. -:: - - import datetime - import stem.descriptor - import stem.descriptor.collector - - yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1) - cache_dir = '~/descriptor_cache/server_desc_today' - - collector = stem.descriptor.collector.CollecTor() - - for f in collector.files('server-descriptor', start = yesterday): - f.download(cache_dir) - - # then later... - - for f in collector.files('server-descriptor', start = yesterday): - for desc in f.read(cache_dir): - if desc.exit_policy.is_exiting_allowed(): - print(' %s (%s)' % (desc.nickname, desc.fingerprint)) +.. literalinclude:: /_static/example/collector_caching.py + :language: python :: @@ -282,7 +251,7 @@ class File(object): :param str descriptor_type: `descriptor type <https://metrics.torproject.org/collector.html#data-formats>`_, this is guessed if not provided - :var stem.descriptor.__init__.DocumentHandler document_handler: method in + :param stem.descriptor.__init__.DocumentHandler document_handler: method in which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` :param int timeout: timeout when connection becomes idle, no timeout applied if **None** @@ -553,7 +522,7 @@ class CollecTor(object): :param datetime.datetime end: time range to end with :param str cache_to: directory to cache archives into, if an archive is available here it is not downloaded - :var stem.descriptor.__init__.DocumentHandler document_handler: method in + :param stem.descriptor.__init__.DocumentHandler document_handler: method in which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` :param int version: consensus variant to retrieve (versions 2 or 3) :param bool microdescriptor: provides the microdescriptor consensus if diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index 2d9133f5..85e35f57 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -869,21 +869,26 @@ class RelayDescriptor(ServerDescriptor): self.certificate.validate(self) @classmethod - def content(cls, attr = None, exclude = (), sign = False, signing_key = None): + def content(cls, attr = None, exclude = (), sign = False, signing_key = None, exit_policy = None): if signing_key: sign = True if attr is None: attr = {} - base_header = ( + if exit_policy is None: + exit_policy = REJECT_ALL_POLICY + + base_header = [ ('router', '%s %s 9001 0 0' % (_random_nickname(), _random_ipv4_address())), ('published', _random_date()), ('bandwidth', '153600 256000 104590'), - ('reject', '*:*'), + ] + [ + tuple(line.split(' ', 1)) for line in str(exit_policy).splitlines() + ] + [ ('onion-key', _random_crypto_blob('RSA PUBLIC KEY')), ('signing-key', _random_crypto_blob('RSA PUBLIC KEY')), - ) + ] if sign: if attr and 'signing-key' in attr: @@ -909,8 +914,8 @@ class RelayDescriptor(ServerDescriptor): )) @classmethod - def create(cls, attr = None, exclude = (), validate = True, sign = False, signing_key = None): - return cls(cls.content(attr, exclude, sign, signing_key), validate = validate, skip_crypto_validation = not sign) + def create(cls, attr = None, exclude = (), validate = True, sign = False, signing_key = None, exit_policy = None): + return cls(cls.content(attr, exclude, sign, signing_key, exit_policy), validate = validate, skip_crypto_validation = not sign) @lru_cache() def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX): diff --git a/test/unit/tutorial.py b/test/unit/tutorial.py index c74b1912..9d70a5cf 100644 --- a/test/unit/tutorial.py +++ b/test/unit/tutorial.py @@ -8,10 +8,10 @@ import unittest import stem.descriptor.remote from stem.control import Controller -from stem.descriptor.reader import DescriptorReader from stem.descriptor.router_status_entry import RouterStatusEntryV2, RouterStatusEntryV3 from stem.descriptor.networkstatus import NetworkStatusDocumentV3 from stem.descriptor.server_descriptor import RelayDescriptor +from stem.exit_policy import ExitPolicy from test.unit import exec_documentation_example try: @@ -165,13 +165,15 @@ class TestTutorial(unittest.TestCase): self.assertEqual('found relay caerSidi (A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB)\n', stdout_mock.getvalue()) @patch('sys.stdout', new_callable = StringIO) - @patch('stem.descriptor.reader.DescriptorReader', spec = DescriptorReader) - def test_mirror_mirror_on_the_wall_4(self, reader_mock, stdout_mock): - reader = reader_mock().__enter__() - reader.__iter__.return_value = iter([RelayDescriptor.create({'router': 'caerSidi 71.35.133.197 9001 0 0'})]) - - exec_documentation_example('past_descriptors.py') - self.assertEqual('found relay caerSidi (None)\n', stdout_mock.getvalue()) + @patch('stem.descriptor.collector.get_server_descriptors') + def test_mirror_mirror_on_the_wall_4(self, get_desc_mock, stdout_mock): + get_desc_mock.return_value = iter([RelayDescriptor.create({ + 'router': 'caerSidi 71.35.133.197 9001 0 0', + 'fingerprint': '2C3C 4662 5698 B6D6 7DF3 2BC1 918A D3EE 1F99 06B1', + }, exit_policy = ExitPolicy('accept *:*'), validate = False)]) + + exec_documentation_example('collector_reading.py') + self.assertEqual(' caerSidi (2C3C46625698B6D67DF32BC1918AD3EE1F9906B1)\n', stdout_mock.getvalue()) @patch('sys.stdout', new_callable = StringIO) @patch('stem.descriptor.remote.DescriptorDownloader')
participants (1)
-
atagar@torproject.org