commit ae6583197cceefcf88b09e5a8c05920ca3ecfe50 Author: Damian Johnson atagar@torproject.org Date: Sun Jan 7 19:03:43 2018 -0800
Support new v2 fallback format
Iterating on the fallback format has changed it a bit to be easier to parse and include header metadata. Supporting this new format. --- docs/change_log.rst | 1 + stem/descriptor/remote.py | 228 +++++++++++++++++++++++++++-------------- test/unit/descriptor/remote.py | 35 +++++++ 3 files changed, 189 insertions(+), 75 deletions(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst index c1af4f58..2d2490b5 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -51,6 +51,7 @@ The following are only available within Stem's `git repository
* **Descriptors**
+ * `Fallback directory v2 support https://lists.torproject.org/pipermail/tor-dev/2017-December/012721.html`_, which adds *nickname* and *extrainfo* * Reduced maximum descriptors fetched by the remote module to match tor's new limit (:trac:`24743`)
* **Website** diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 13fa2893..1f002e27 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -12,7 +12,7 @@ information...
for desc in stem.descriptor.remote.get_server_descriptors(): if desc.exit_policy.is_exiting_allowed(): - print ' %s (%s)' % (desc.nickname, desc.fingerprint) + print(' %s (%s)' % (desc.nickname, desc.fingerprint))
More custom downloading behavior can be done through the :class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues @@ -30,17 +30,17 @@ content. For example...
query = downloader.get_server_descriptors()
- print 'Exit Relays:' + print('Exit Relays:')
try: for desc in query.run(): if desc.exit_policy.is_exiting_allowed(): - print ' %s (%s)' % (desc.nickname, desc.fingerprint) + print(' %s (%s)' % (desc.nickname, desc.fingerprint))
print - print 'Query took %0.2f seconds' % query.runtime + print('Query took %0.2f seconds' % query.runtime) except Exception as exc: - print 'Unable to retrieve the server descriptors: %s' % exc + print('Unable to retrieve the server descriptors: %s' % exc)
::
@@ -112,6 +112,14 @@ MAX_MICRODESCRIPTOR_HASHES = 90 GITWEB_FALLBACK_DIR_URL = 'https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc' CACHE_PATH = os.path.join(os.path.dirname(__file__), 'fallback_directories.cfg')
+FALLBACK_DIV = '/* ===== */' +FALLBACK_MAPPING = re.compile('/*\s+(\S+)=(\S*)\s+*/') + +FALLBACK_ADDR = re.compile('"([\d.]+):(\d+) orport=(\d+) id=([\dA-F]{40}).*') +FALLBACK_NICKNAME = re.compile('/* nickname=(\S+) */') +FALLBACK_EXTRAINFO = re.compile('/* extrainfo=([0-1]) */') +FALLBACK_IPV6 = re.compile('" ipv6=[([\da-f:]+)]:(\d+)"') + SINGLETON_DOWNLOADER = None
@@ -215,22 +223,22 @@ class Query(object): timeout = 30, )
- print 'Current relays:' + print('Current relays:')
if not query.error: for desc in query: - print desc.fingerprint + print(desc.fingerprint) else: - print 'Unable to retrieve the server descriptors: %s' % query.error + print('Unable to retrieve the server descriptors: %s' % query.error)
... while iterating fails silently...
::
- print 'Current relays:' + print('Current relays:')
for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'): - print desc.fingerprint + print(desc.fingerprint)
In either case exceptions are available via our 'error' attribute.
@@ -568,7 +576,7 @@ class DescriptorDownloader(object):
return self.query(resource, **query_args)
- # TODO: drop in python 2.x + # TODO: drop in stem 2.x
def get_microdescriptors(self, hashes, **query_args): """ @@ -940,10 +948,8 @@ class FallbackDirectory(Directory): .. versionadded:: 1.5.0
.. versionchanged:: 1.7.0 - Added the nickname and has_extrainfo attributes. - - .. versionchanged:: 1.7.0 - Support for parsing `second version of the fallback directories + Added the nickname and has_extrainfo attributes, which are part of the + `second version of the fallback directories https://lists.torproject.org/pipermail/tor-dev/2017-December/012721.html`_.
:var str nickname: relay nickname @@ -1044,80 +1050,152 @@ class FallbackDirectory(Directory): """
try: - fallback_dir_page = str_tools._to_unicode(urllib.urlopen(GITWEB_FALLBACK_DIR_URL, timeout = timeout).read()) + lines = str_tools._to_unicode(urllib.urlopen(GITWEB_FALLBACK_DIR_URL, timeout = timeout).read()).splitlines() except: exc = sys.exc_info()[1] raise IOError("Unable to download tor's fallback directories from %s: %s" % (GITWEB_FALLBACK_DIR_URL, exc))
- return FallbackDirectory._parse_v2(fallback_dir_page) + if not lines: + raise IOError('%s did not have any content' % GITWEB_FALLBACK_DIR_URL) + elif lines[0] != '/* type=fallback */': + raise IOError('%s does not have a type field indicating it is fallback directory metadata' % GITWEB_FALLBACK_DIR_URL) + + # header metadata + + header = {} + + for line in FallbackDirectory._pop_section(lines): + mapping = FALLBACK_MAPPING.match(line) + + if mapping: + header[mapping.group(1)] = mapping.group(2) + else: + raise IOError('Malformed fallback directory header line: %s' % line) + + # human readable comments + + FallbackDirectory._pop_section(lines) + + # content, everything remaining are fallback directories + + results = {} + + while lines: + # Example of an entry... + # + # "5.9.110.236:9030 orport=9001 id=0756B7CD4DFC8182BE23143FAC0642F515182CEB" + # " ipv6=[2a01:4f8:162:51e2::2]:9001" + # /* nickname=rueckgrat */ + # /* extrainfo=1 */ + + section = FallbackDirectory._pop_section(lines) + + if not section: + continue + + address, or_port, dir_port, fingerprint = FallbackDirectory._parse_addr(section) + + results[fingerprint] = FallbackDirectory( + address = address, + or_port = or_port, + dir_port = dir_port, + fingerprint = fingerprint, + nickname = FallbackDirectory._parse_nickname(section, fingerprint), + has_extrainfo = FallbackDirectory._parse_has_extrainfo(section), + orport_v6 = FallbackDirectory._parse_ipv6(section, fingerprint), + ) + + return results
@staticmethod - def _parse_v2(fallback_dir_page): - # Example of an entry... - # - # "5.9.110.236:9030 orport=9001 id=0756B7CD4DFC8182BE23143FAC0642F515182CEB" - # " ipv6=[2a01:4f8:162:51e2::2]:9001" - # /* nickname=rueckgrat */ - # /* extrainfo=1 */ - - results, attr = {}, {} - - for line in fallback_dir_page.splitlines(): - addr_line_match = re.match('"([\d.]+):(\d+) orport=(\d+) id=([\dA-F]{40}).*', line) - nickname_match = re.match('/* nickname=(\S+) */', line) - has_extrainfo_match = re.match('/* extrainfo=([0-1]) */', line) - ipv6_line_match = re.match('" ipv6=[([\da-f:]+)]:(\d+)"', line) - - if addr_line_match: - address, dir_port, or_port, fingerprint = addr_line_match.groups() - - if not connection.is_valid_ipv4_address(address): - raise IOError('%s has an invalid IPv4 address: %s' % (fingerprint, address)) - elif not connection.is_valid_port(or_port): - raise IOError('%s has an invalid or_port: %s' % (fingerprint, or_port)) - elif not connection.is_valid_port(dir_port): - raise IOError('%s has an invalid dir_port: %s' % (fingerprint, dir_port)) - elif not tor_tools.is_valid_fingerprint(fingerprint): - raise IOError('%s has an invalid fingerprint: %s' % (fingerprint, fingerprint)) - - attr = { - 'address': address, - 'or_port': int(or_port), - 'dir_port': int(dir_port), - 'fingerprint': fingerprint, - } - elif ipv6_line_match: - address, port = ipv6_line_match.groups() + def _pop_section(lines): + """ + Provides lines up through the next divider. This excludes lines with just a + comma since they're an artifact of these being C strings. + """
- if not connection.is_valid_ipv6_address(address): - raise IOError('%s has an invalid IPv6 address: %s' % (fingerprint, address)) - elif not connection.is_valid_port(port): - raise IOError('%s has an invalid ORPort for its IPv6 endpoint: %s' % (fingerprint, port)) + section_lines = [] + + if lines: + line = lines.pop(0) + + while lines and line != FALLBACK_DIV: + if line.strip() != ',': + section_lines.append(line)
- attr['orport_v6'] = (address, int(port)) - elif nickname_match: - nickname = nickname_match.group(1) + line = lines.pop(0) + + return section_lines + + @staticmethod + def _parse_addr(lines): + """ + Provides the mandatory address information of a fallback. + """ + + addr_match = FALLBACK_ADDR.match(lines[0]) + + if not addr_match: + raise IOError('Malformed initial fallback line:\n\n%s' % '\n'.join(lines)) + + address, dir_port, or_port, fingerprint = addr_match.groups() + + if not connection.is_valid_ipv4_address(address): + raise IOError('%s has an invalid IPv4 address: %s' % (fingerprint, address)) + elif not connection.is_valid_port(or_port): + raise IOError('%s has an invalid or_port: %s' % (fingerprint, or_port)) + elif not connection.is_valid_port(dir_port): + raise IOError('%s has an invalid dir_port: %s' % (fingerprint, dir_port)) + elif not tor_tools.is_valid_fingerprint(fingerprint): + raise IOError('%s has an invalid fingerprint: %s' % (fingerprint, fingerprint)) + + return address, int(or_port), int(dir_port), fingerprint + + @staticmethod + def _parse_nickname(lines, fingerprint): + """ + Provides the nickname of the fallback. + """ + + for line in lines: + match = FALLBACK_NICKNAME.match(line) + + if match: + nickname = match.group(1)
if not tor_tools.is_valid_nickname(nickname): raise IOError('%s has an invalid nickname: %s' % (fingerprint, nickname))
- attr['nickname'] = nickname - elif has_extrainfo_match: - attr['has_extrainfo'] = has_extrainfo_match.group(1) == '1' - - results[attr.get('fingerprint')] = FallbackDirectory( - address = attr.get('address'), - or_port = attr.get('or_port'), - dir_port = attr.get('dir_port'), - fingerprint = attr.get('fingerprint'), - nickname = attr.get('nickname'), - has_extrainfo = attr.get('has_extrainfo', False), - orport_v6 = attr.get('orport_v6'), - ) + return nickname + + @staticmethod + def _parse_has_extrainfo(lines): + for line in lines: + match = FALLBACK_EXTRAINFO.match(line)
- attr = {} + if match: + return match.group(1) == '1'
- return results + return False + + @staticmethod + def _parse_ipv6(lines, fingerprint): + """ + Provides the IPv6 ORPort for a fallback. + """ + + for line in lines: + match = FALLBACK_IPV6.match(line) + + if match: + address, port = match.groups() + + if not connection.is_valid_ipv6_address(address): + raise IOError('%s has an invalid IPv6 address: %s' % (fingerprint, address)) + elif not connection.is_valid_port(port): + raise IOError('%s has an invalid ORPort for its IPv6 endpoint: %s' % (fingerprint, port)) + + return (address, int(port))
def __hash__(self): return _hash_attr(self, 'orport_v6', parent = Directory) diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py index 414be7e8..17e17595 100644 --- a/test/unit/descriptor/remote.py +++ b/test/unit/descriptor/remote.py @@ -59,10 +59,36 @@ iO3EUE0AEYah2W9gdz8t+i3Dtr0zgqLS841GC/TyDKCm+MKmN8d098qnwK0NGF9q """
FALLBACK_DIR_CONTENT = b"""\ +/* type=fallback */ +/* version=2.0.0 */ +/* timestamp=20170526090242 */ +/* ===== */ +/* Whitelist & blacklist excluded 1326 of 1513 candidates. */ +/* Checked IPv4 DirPorts served a consensus within 15.0s. */ +/* +Final Count: 151 (Eligible 187, Target 392 (1963 * 0.20), Max 200) +Excluded: 36 (Same Operator 27, Failed/Skipped Download 9, Excess 0) +Bandwidth Range: 1.3 - 40.0 MByte/s +*/ +/* +Onionoo Source: details Date: 2017-05-16 07:00:00 Version: 4.0 +URL: https:onionoo.torproject.orgdetails?fields=fingerprint%2Cnickname%2Ccontact%2Clast_changed_address_or_port%2Cconsensus_weight%2Cadvertised_bandwidth%2Cor_addresses%2Cdir_address%2Crecommended_version%2Cflags%2Ceffective_family%2Cplatform&flag=V2Dir&type=relay&last_seen_days=-0&first_seen_days=30- +*/ +/* +Onionoo Source: uptime Date: 2017-05-16 07:00:00 Version: 4.0 +URL: https:onionoo.torproject.orguptime?first_seen_days=30-&flag=V2Dir&type=relay&last_seen_days=-0 +*/ +/* ===== */ "5.9.110.236:9030 orport=9001 id=0756B7CD4DFC8182BE23143FAC0642F515182CEB" " ipv6=[2a01:4f8:162:51e2::2]:9001" /* nickname=rueckgrat */ /* extrainfo=1 */ +/* ===== */ +, +"193.171.202.146:9030 orport=9001 id=01A9258A46E97FF8B2CAC7910577862C14F2C524" +/* nickname= */ +/* extrainfo=0 */ +/* ===== */ """
@@ -187,6 +213,15 @@ class TestDescriptorDownloader(unittest.TestCase): has_extrainfo = True, orport_v6 = ('2a01:4f8:162:51e2::2', 9001), ), + '01A9258A46E97FF8B2CAC7910577862C14F2C524': stem.descriptor.remote.FallbackDirectory( + address = '193.171.202.146', + or_port = 9001, + dir_port = 9030, + fingerprint = '01A9258A46E97FF8B2CAC7910577862C14F2C524', + nickname = None, + has_extrainfo = False, + orport_v6 = None, + ), }
self.assertEqual(expected, fallback_directories)