[stem/master] Fallback directory information

commit 332ef6e6fe89d9f0d48e9b95acff7de9f3ec647f Author: Damian Johnson <atagar@torproject.org> Date: Sun Feb 21 18:04:40 2016 -0800 Fallback directory information Recently tor added fallback directory authorities to help clients bootstrap when the dirauths are unavailable... https://trac.torproject.org/projects/tor/ticket/17158 To help support this effort teor asked for a DocTor check to notify us when the directories become unavailable... https://trac.torproject.org/projects/tor/ticket/18177 Great idea but I'm expanding this. Stem now has a FallbackDirectory class with two methods for getting this information... * FallbackDirectory.from_remote() - Reads the latest fallback_dirs.inc from gitweb, providing the latest fallback directories in tor's master branch. * FallbackDirectory.from_cache() - Provides the latest fallback directories Stem has cached. This is only as up-to-date as the Stem release you're using but is quicker and avoids relying on gitweb. Advantages are... * Stem's descriptor.remote module now puts less load on the directory authorities since it uses fallback directories as well. * Running Stem's integ tests with the ONLINE target includes a test that exercises all the fallback directories, notifying us if any are down. --- cache_manual.py | 4 +- docs/change_log.rst | 1 + run_tests.py | 1 + setup.py | 2 +- stem/descriptor/remote.py | 266 ++++++++++++++++++++++++++++++++++++---- test/integ/descriptor/remote.py | 38 ++++++ test/unit/descriptor/remote.py | 56 +++++++++ 7 files changed, 341 insertions(+), 27 deletions(-) diff --git a/cache_manual.py b/cache_manual.py index aa55dc5..19e73f0 100755 --- a/cache_manual.py +++ b/cache_manual.py @@ -6,7 +6,6 @@ Caches tor's latest manual content. Run this to pick new man page changes. """ -import os import re import sys @@ -19,7 +18,6 @@ try: except ImportError: import urllib2 as urllib -CACHE_PATH = os.path.join(os.path.dirname(__file__), 'stem', 'cached_tor_manual.cfg') GITWEB_MAN_LOG = 'https://gitweb.torproject.org/tor.git/log/doc/tor.1.txt' MAN_LOG_LINK = "href='/tor.git/commit/doc/tor.1.txt\?id=([^']*)'" @@ -53,4 +51,4 @@ if __name__ == '__main__': latest_manual.man_commit = man_commit latest_manual.stem_commit = stem_commit - latest_manual.save(CACHE_PATH) + latest_manual.save(stem.manual.CACHE_PATH) diff --git a/docs/change_log.rst b/docs/change_log.rst index 81bf5a2..c8dc434 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -58,6 +58,7 @@ The following are only available within Stem's `git repository * **Descriptors** + * Fallback directory information, lessing the load of `stem.descriptor.remote <api/descriptor/remote.html>`_ on the directory authorities * Support for ed25519 descriptor fields (:spec:`5a79d67`) * Server descriptor validation fails with 'extra-info-digest line had an invalid value' from additions in proposal 228 (:trac:`16227`) * :class:`~stem.descriptor.server_descriptor.BridgeDescriptor` now has 'ntor_onion_key' like its unsanitized counterparts diff --git a/run_tests.py b/run_tests.py index 3d0f845..2fb93bf 100755 --- a/run_tests.py +++ b/run_tests.py @@ -42,6 +42,7 @@ SRC_PATHS = [os.path.join(STEM_BASE, path) for path in ( 'test', 'run_tests.py', 'cache_manual.py', + 'cache_fallback_directories.py', 'setup.py', 'tor-prompt', os.path.join('docs', 'republish.py'), diff --git a/setup.py b/setup.py index 2a78b9b..2e492d4 100644 --- a/setup.py +++ b/setup.py @@ -17,5 +17,5 @@ distutils.core.setup( packages = ['stem', 'stem.descriptor', 'stem.interpreter', 'stem.response', 'stem.util'], keywords = 'tor onion controller', scripts = ['tor-prompt'], - package_data = {'stem': ['cached_tor_manual.cfg', 'settings.cfg'], 'stem.interpreter': ['settings.cfg'], 'stem.util': ['ports.cfg']}, + package_data = {'stem': ['cached_tor_manual.cfg', 'settings.cfg'], 'stem.descriptor': ['fallback_directories.cfg'], 'stem.interpreter': ['settings.cfg'], 'stem.util': ['ports.cfg']}, ) diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index d199fb2..dc40e41 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -44,7 +44,11 @@ itself... get_authorities - Provides tor directory information. - DirectoryAuthority - Information about a tor directory authority. + Directory - Relay we can retrieve directory information from + |- DirectoryAuthority - Information about a tor directory authority + +- FallbackDirectory - Directory mirror tor uses when authories are unavailable + |- from_cache - Provides fallback directories cached with Stem. + +- from_remote - Retrieves fallback directories remotely from tor's latest commit. Query - Asynchronous request to download tor descriptors |- start - issues the query if it isn't already running @@ -73,7 +77,9 @@ itself... """ import io +import os import random +import re import sys import threading import time @@ -88,7 +94,7 @@ except ImportError: import stem.descriptor from stem import Flag -from stem.util import log +from stem.util import connection, log, tor_tools # Tor has a limited number of descriptors we can fetch explicitly by their # fingerprint or hashes due to a limit on the url length by squid proxies. @@ -96,6 +102,9 @@ from stem.util import log MAX_FINGERPRINTS = 96 MAX_MICRODESCRIPTOR_HASHES = 92 +GITWEB_FALLBACK_DIR_URL = 'https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc' +CACHE_PATH = os.path.join(os.path.dirname(__file__), 'fallback_directories.cfg') + def _guess_descriptor_type(resource): # Attempts to determine the descriptor type based on the resource url. This @@ -340,8 +349,10 @@ class Query(object): """ if use_authority or not self.endpoints: - authority = random.choice(list(filter(lambda auth: auth.v3ident is not None, get_authorities().values()))) - address, dirport = authority.address, authority.dir_port + directories = get_authorities().values() + FallbackDirectory.from_cache().values() + + picked = random.choice(directories) + address, dirport = picked.address, picked.dir_port else: address, dirport = random.choice(self.endpoints) @@ -390,8 +401,8 @@ class DescriptorDownloader(object): def __init__(self, use_mirrors = False, **default_args): self._default_args = default_args - authorities = filter(lambda auth: auth.v3ident is not None, get_authorities().values()) - self._endpoints = [(auth.address, auth.dir_port) for auth in authorities] + directories = get_authorities().values() + FallbackDirectory.from_cache().values() + self._endpoints = [(directory.address, directory.dir_port) for directory in directories] if use_mirrors: try: @@ -412,8 +423,8 @@ class DescriptorDownloader(object): :raises: **Exception** if unable to determine the directory mirrors """ - authorities = filter(lambda auth: auth.v3ident is not None, get_authorities().values()) - new_endpoints = set([(auth.address, auth.dir_port) for auth in authorities]) + directories = get_authorities().values() + FallbackDirectory.from_cache().values() + new_endpoints = set([(directory.address, directory.dir_port) for directory in directories]) consensus = list(self.get_consensus(document_handler = stem.descriptor.DocumentHandler.DOCUMENT).run())[0] @@ -617,7 +628,39 @@ class DescriptorDownloader(object): ) -class DirectoryAuthority(object): +class Directory(object): + """ + Relay we can constact for directory information + + .. versionadded:: 1.5.0 + + :var str address: IP address of the authority, currently they're all IPv4 but + this may not always be the case + :var int or_port: port on which the relay services relay traffic + :var int dir_port: port on which directory information is available + :var str fingerprint: relay fingerprint + :var str nickname: nickname of the authority + """ + + def __init__(self, address, or_port, dir_port, fingerprint, nickname): + self.address = address + self.or_port = or_port + self.dir_port = dir_port + self.fingerprint = fingerprint + self.nickname = nickname + + def __eq__(self, other): + if not isinstance(other, Directory): + return False + + for attr in ('nickname', 'address', 'or_port', 'dir_port', 'fingerprint'): + if getattr(self, attr) != getattr(other, attr): + return False + + return True + + +class DirectoryAuthority(Directory): """ Tor directory authority, a special type of relay `hardcoded into tor <https://gitweb.torproject.org/tor.git/tree/src/or/config.c#n819>`_ @@ -648,23 +691,21 @@ class DirectoryAuthority(object): .. versionchanged:: 1.3.0 Added the is_bandwidth_authority attribute. - :var str nickname: nickname of the authority - :var str address: IP address of the authority, currently they're all IPv4 but - this may not always be the case - :var int or_port: port on which the relay services relay traffic - :var int dir_port: port on which directory information is available - :var str fingerprint: relay fingerprint :var str v3ident: identity key fingerprint used to sign votes and consensus + :var bool is_bandwidth_authority: **True** if this is a bandwidth authority, + **False** otherwise """ - def __init__(self, nickname = None, address = None, or_port = None, dir_port = None, is_bandwidth_authority = False, fingerprint = None, v3ident = None): - self.nickname = nickname - self.address = address - self.or_port = or_port - self.dir_port = dir_port - self.is_bandwidth_authority = is_bandwidth_authority - self.fingerprint = fingerprint + def __init__(self, address = None, or_port = None, dir_port = None, fingerprint = None, nickname = None, v3ident = None, is_bandwidth_authority = False): + super(DirectoryAuthority, self).__init__(address, or_port, dir_port, fingerprint, nickname) self.v3ident = v3ident + self.is_bandwidth_authority = is_bandwidth_authority + + def __eq__(self, other): + if isinstance(other, DirectoryAuthority) and super(DirectoryAuthority, self).__eq__(other): + return self.v3ident == other.v3ident and self.is_bandwidth_authority == other.is_bandwidth_authority + else: + return False DIRECTORY_AUTHORITIES = { @@ -767,7 +808,186 @@ def get_authorities(): The directory information hardcoded into Tor and occasionally changes, so the information this provides might not necessarily match your version of tor. - :returns: dict of str nicknames to :class:`~stem.descriptor.remote.DirectoryAuthority` instances + :returns: **dict** of **str** nicknames to :class:`~stem.descriptor.remote.DirectoryAuthority` instances """ return dict(DIRECTORY_AUTHORITIES) + + +class FallbackDirectory(Directory): + """ + Tor directories tor uses as alternates for the authorities. These relays are + `hardcoded in tor <https://gitweb.torproject.org/tor.git/tree/src/or/fallback_dirs.inc>`_. + + .. versionadded:: 1.5.0 + """ + + def __init__(self, address = None, or_port = None, dir_port = None, fingerprint = None, nickname = None): + super(FallbackDirectory, self).__init__(address, or_port, dir_port, fingerprint, nickname) + + @staticmethod + def from_cache(): + """ + Provides fallback directory information cached with Stem. Unlike + :func:`~stem.descriptor.remote.FallbackDirectory.from_remote` this doesn't + have any system requirements, and is faster too. Only drawback is that + these fallback directories are only as up to date as the Stem release we're + using. + + :returns: **dict** of **str** fingerprints to their + :class:`~stem.descriptor.remote.FallbackDirectory` + """ + + conf = stem.util.conf.Config() + conf.load(CACHE_PATH) + + results = {} + + for nickname in set([key.split('.')[0] for key in conf.keys()]): + if nickname in ('tor_commit', 'stem_commit'): + continue + + attr = {} + + for attr_name in ('address', 'or_port', 'dir_port', 'fingerprint'): + key = '%s.%s' % (nickname, attr_name) + attr[attr_name] = conf.get(key) + + if not attr[attr_name]: + raise IOError("'%s' is missing from %s" % (key, CACHE_PATH)) + + if not connection.is_valid_ipv4_address(attr['address']): + raise IOError("'%s.address' was an invalid address (%s)" % (nickname, attr['address'])) + elif not connection.is_valid_port(attr['or_port']): + raise IOError("'%s.or_port' was an invalid port (%s)" % (nickname, attr['or_port'])) + elif not connection.is_valid_port(attr['dir_port']): + raise IOError("'%s.dir_port' was an invalid port (%s)" % (nickname, attr['dir_port'])) + elif not tor_tools.is_valid_fingerprint(attr['fingerprint']): + raise IOError("'%s.fingerprint' was an invalid fingerprint (%s)" % (nickname, attr['fingerprint'])) + + results[attr['fingerprint']] = FallbackDirectory( + address = attr['address'], + or_port = int(attr['or_port']), + dir_port = int(attr['dir_port']), + fingerprint = attr['fingerprint'], + nickname = nickname, + ) + + return results + + @staticmethod + def from_remote(timeout = 60): + """ + Reads and parses tor's latest fallback directories `from + gitweb.torproject.org + <https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc>`_. + Note that while convenient, this reliance on GitWeb means you should alway + call with a fallback, such as... + + :: + + try: + fallback_directories = stem.descriptor.remote.from_remote() + except IOError: + fallback_directories = stem.descriptor.remote.from_cache() + + :param int timeout: seconds to wait before timing out the request + + :returns: **dict** of **str** fingerprints to their + :class:`~stem.descriptor.remote.FallbackDirectory` + + :raises: **IOError** if unable to retrieve the fallback directories + """ + + try: + fallback_dir_page = urllib.urlopen(GITWEB_FALLBACK_DIR_URL, timeout = timeout).read() + except: + exc = sys.exc_info()[1] + raise IOError("Unable to download tor's fallback directories from %s: %s" % (GITWEB_FALLBACK_DIR_URL, exc)) + + # Example of an entry... + # + # /* + # wagner + # Flags: Fast Guard Running Stable V2Dir Valid + # Fallback Weight: 43680 / 491920 (8.879%) + # Consensus Weight: 62600 / 546000 (11.465%) + # Rarely used email <trff914 AT gmail DOT com> + # */ + # "5.175.233.86:80 orport=443 id=5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33" + # " weight=43680", + + results, nickname, last_line = {}, None, None + + for line in fallback_dir_page.splitlines(): + if last_line == '/*': + nickname = line + elif line.startswith('"'): + addr_line_match = re.match('"([\d\.]+):(\d+) orport=(\d+) id=([\dA-F]{40}).*', line) + + if addr_line_match: + address, dir_port, or_port, fingerprint = addr_line_match.groups() + + if not connection.is_valid_ipv4_address(address): + raise IOError('%s has an invalid address: %s' % (nickname, address)) + elif not connection.is_valid_port(or_port): + raise IOError('%s has an invalid or_port: %s' % (nickname, or_port)) + elif not connection.is_valid_port(dir_port): + raise IOError('%s has an invalid dir_port: %s' % (nickname, dir_port)) + elif not tor_tools.is_valid_fingerprint(fingerprint): + raise IOError('%s has an invalid fingerprint: %s' % (nickname, fingerprint)) + + results[fingerprint] = FallbackDirectory( + address = address, + or_port = int(or_port), + dir_port = int(dir_port), + fingerprint = fingerprint, + nickname = nickname, + ) + + last_line = line + + return results + + +def _fallback_directory_differences(previous_directories, new_directories): + """ + Provides a description of how fallback directories differ. + """ + + lines = [] + + added_fp = set(new_directories.keys()).difference(previous_directories.keys()) + removed_fp = set(previous_directories.keys()).difference(new_directories.keys()) + + for fp in added_fp: + directory = new_directories[fp] + + lines += [ + '* Added %s as a new fallback directory:' % directory.nickname, + ' address: %s' % directory.address, + ' or_port: %s' % directory.or_port, + ' dir_port: %s' % directory.dir_port, + ' fingerprint: %s' % directory.fingerprint, + '', + ] + + for fp in removed_fp: + lines.append('* Removed %s as a fallback directory' % previous_directories[fp].nickname) + + for fp in new_directories: + if fp in added_fp or fp in removed_fp: + continue # already discussed these + + previous_directory = previous_directories[fp] + new_directory = new_directories[fp] + + if previous_directory != new_directory: + for attr in ('nickname', 'address', 'or_port', 'dir_port', 'fingerprint'): + old_attr = getattr(previous_directory, attr) + new_attr = getattr(new_directory, attr) + + if old_attr != new_attr: + lines.append('* Changed the %s of %s from %s to %s' % (attr, fp, old_attr, new_attr)) + + return '\n'.join(lines) diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py index fd37b57..9ce9b8f 100644 --- a/test/integ/descriptor/remote.py +++ b/test/integ/descriptor/remote.py @@ -218,3 +218,41 @@ class TestDescriptorDownloader(unittest.TestCase): self.assertTrue(isinstance(single_query_results[0], stem.descriptor.networkstatus.KeyCertificate)) self.assertEqual(2, len(list(multiple_query))) + + @require_online + def test_that_cache_is_up_to_date(self): + """ + Check if the cached fallback directories bundled with Stem are up to date + or not. + """ + + cached_fallback_directories = stem.descriptor.remote.FallbackDirectory.from_cache() + latest_fallback_directories = stem.descriptor.remote.FallbackDirectory.from_remote() + + if cached_fallback_directories != latest_fallback_directories: + self.fail("Stem's cached fallback directories are out of date. Please run 'cache_fallback_directories.py'...\n\n%s" % stem.descriptor.remote._fallback_directory_differences(cached_fallback_directories, latest_fallback_directories)) + + @require_online + def test_that_fallback_directories_are_reachable(self): + """ + Fetch information from each fallback directory to confirm that it's + available. + """ + + unsuccessful = {} + downloader = stem.descriptor.remote.DescriptorDownloader() + moria1_v3ident = stem.descriptor.remote.get_authorities()['moria1'].v3ident + + for fallback_directory in stem.descriptor.remote.FallbackDirectory.from_cache().values(): + try: + downloader.get_key_certificates(authority_v3idents = moria1_v3ident, endpoints = [(fallback_directory.address, fallback_directory.dir_port)]).run() + except Exception as exc: + unsuccessful[fallback_directory] = exc + + if unsuccessful: + lines = ['We were unable to contact the following fallback directories...\n'] + + for fallback_directory, exc in unsuccessful.items(): + lines.append('* %s:%s (%s): %s' % (fallback_directory.address, fallback_directory.dir_port, fallback_directory.fingerprint, exc)) + + self.fail('\n'.join(lines)) diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py index 19907a7..f0e0dea 100644 --- a/test/unit/descriptor/remote.py +++ b/test/unit/descriptor/remote.py @@ -58,6 +58,32 @@ iO3EUE0AEYah2W9gdz8t+i3Dtr0zgqLS841GC/TyDKCm+MKmN8d098qnwK0NGF9q -----END SIGNATURE----- """ +FALLBACK_DIR_CONTENT = b"""\ +/* Trial fallbacks for 0.2.8.1-alpha with ADDRESS_AND_PORT_STABLE_DAYS = 30 + * This works around an issue where relays post a descriptor without a DirPort + * when restarted. If these relays stay up, they will have been up for 120 days + * by the 0.2.8 stable release -- teor */ +/* +wagner +Flags: Fast Guard Running Stable V2Dir Valid +Fallback Weight: 43680 / 491920 (8.879%) +Consensus Weight: 62600 / 546000 (11.465%) +Rarely used email <trff914 AT gmail DOT com> +*/ +"5.175.233.86:80 orport=443 id=5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33" +" weight=43680", +/* +kitten2 +Flags: Fast Guard HSDir Running Stable V2Dir Valid +Fallback Weight: 43680 / 491920 (8.879%) +Consensus Weight: 59100 / 546000 (10.824%) +0xEFB74277ECE4E222 Aeris <aeris+tor AT imirhil DOT fr> - 1aerisnnLWPchhDSXpxWGYWwLiSFUVFnd +*/ +"62.210.124.124:9130 orport=9101 id=2EBD117806EE43C3CC885A8F1E4DC60F207E7D3E" +" ipv6=[2001:bc8:3f23:100::1]:9101" +" weight=43680", +""" + class TestDescriptorDownloader(unittest.TestCase): @patch(URL_OPEN) @@ -154,3 +180,33 @@ class TestDescriptorDownloader(unittest.TestCase): self.assertEqual(1, len(list(query))) self.assertEqual(1, len(list(query))) self.assertEqual(1, len(list(query))) + + def test_fallback_directories_from_cache(self): + # quick sanity test that we can load cached content + fallback_directories = stem.descriptor.remote.FallbackDirectory.from_cache() + self.assertTrue(len(fallback_directories) > 10) + self.assertEqual('wagner', fallback_directories['5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33'].nickname) + + @patch(URL_OPEN) + def test_fallback_directories_from_remote(self, urlopen_mock): + urlopen_mock.return_value = io.BytesIO(FALLBACK_DIR_CONTENT) + fallback_directories = stem.descriptor.remote.FallbackDirectory.from_remote() + + expected = { + '5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33': stem.descriptor.remote.FallbackDirectory( + nickname = 'wagner', + address = '5.175.233.86', + or_port = 443, + dir_port = 80, + fingerprint = '5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33', + ), + '2EBD117806EE43C3CC885A8F1E4DC60F207E7D3E': stem.descriptor.remote.FallbackDirectory( + nickname = 'kitten2', + address = '62.210.124.124', + or_port = 9101, + dir_port = 9130, + fingerprint = '2EBD117806EE43C3CC885A8F1E4DC60F207E7D3E', + ), + } + + self.assertEqual(expected, fallback_directories)
participants (1)
-
atagar@torproject.org