[tor-commits] [stem/master] Support new v2 fallback format

atagar at torproject.org atagar at torproject.org
Fri Jan 12 18:16:53 UTC 2018


commit ae6583197cceefcf88b09e5a8c05920ca3ecfe50
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Jan 7 19:03:43 2018 -0800

    Support new v2 fallback format
    
    Iterating on the fallback format has changed it a bit to be easier to parse and
    include header metadata. Supporting this new format.
---
 docs/change_log.rst            |   1 +
 stem/descriptor/remote.py      | 228 +++++++++++++++++++++++++++--------------
 test/unit/descriptor/remote.py |  35 +++++++
 3 files changed, 189 insertions(+), 75 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index c1af4f58..2d2490b5 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -51,6 +51,7 @@ The following are only available within Stem's `git repository
 
  * **Descriptors**
 
+  * `Fallback directory v2 support <https://lists.torproject.org/pipermail/tor-dev/2017-December/012721.html>`_, which adds *nickname* and *extrainfo*
   * Reduced maximum descriptors fetched by the remote module to match tor's new limit (:trac:`24743`)
 
  * **Website**
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 13fa2893..1f002e27 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -12,7 +12,7 @@ information...
 
   for desc in stem.descriptor.remote.get_server_descriptors():
     if desc.exit_policy.is_exiting_allowed():
-      print '  %s (%s)' % (desc.nickname, desc.fingerprint)
+      print('  %s (%s)' % (desc.nickname, desc.fingerprint))
 
 More custom downloading behavior can be done through the
 :class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues
@@ -30,17 +30,17 @@ content. For example...
 
   query = downloader.get_server_descriptors()
 
-  print 'Exit Relays:'
+  print('Exit Relays:')
 
   try:
     for desc in query.run():
       if desc.exit_policy.is_exiting_allowed():
-        print '  %s (%s)' % (desc.nickname, desc.fingerprint)
+        print('  %s (%s)' % (desc.nickname, desc.fingerprint))
 
     print
-    print 'Query took %0.2f seconds' % query.runtime
+    print('Query took %0.2f seconds' % query.runtime)
   except Exception as exc:
-    print 'Unable to retrieve the server descriptors: %s' % exc
+    print('Unable to retrieve the server descriptors: %s' % exc)
 
 ::
 
@@ -112,6 +112,14 @@ MAX_MICRODESCRIPTOR_HASHES = 90
 GITWEB_FALLBACK_DIR_URL = 'https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc'
 CACHE_PATH = os.path.join(os.path.dirname(__file__), 'fallback_directories.cfg')
 
+FALLBACK_DIV = '/* ===== */'
+FALLBACK_MAPPING = re.compile('/\*\s+(\S+)=(\S*)\s+\*/')
+
+FALLBACK_ADDR = re.compile('"([\d\.]+):(\d+) orport=(\d+) id=([\dA-F]{40}).*')
+FALLBACK_NICKNAME = re.compile('/\* nickname=(\S+) \*/')
+FALLBACK_EXTRAINFO = re.compile('/\* extrainfo=([0-1]) \*/')
+FALLBACK_IPV6 = re.compile('" ipv6=\[([\da-f:]+)\]:(\d+)"')
+
 SINGLETON_DOWNLOADER = None
 
 
@@ -215,22 +223,22 @@ class Query(object):
       timeout = 30,
     )
 
-    print 'Current relays:'
+    print('Current relays:')
 
     if not query.error:
       for desc in query:
-        print desc.fingerprint
+        print(desc.fingerprint)
     else:
-      print 'Unable to retrieve the server descriptors: %s' % query.error
+      print('Unable to retrieve the server descriptors: %s' % query.error)
 
   ... while iterating fails silently...
 
   ::
 
-    print 'Current relays:'
+    print('Current relays:')
 
     for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'):
-      print desc.fingerprint
+      print(desc.fingerprint)
 
   In either case exceptions are available via our 'error' attribute.
 
@@ -568,7 +576,7 @@ class DescriptorDownloader(object):
 
     return self.query(resource, **query_args)
 
-  # TODO: drop in python 2.x
+  # TODO: drop in stem 2.x
 
   def get_microdescriptors(self, hashes, **query_args):
     """
@@ -940,10 +948,8 @@ class FallbackDirectory(Directory):
   .. versionadded:: 1.5.0
 
   .. versionchanged:: 1.7.0
-     Added the nickname and has_extrainfo attributes.
-
-  .. versionchanged:: 1.7.0
-     Support for parsing `second version of the fallback directories
+     Added the nickname and has_extrainfo attributes, which are part of the
+     `second version of the fallback directories
      <https://lists.torproject.org/pipermail/tor-dev/2017-December/012721.html>`_.
 
   :var str nickname: relay nickname
@@ -1044,80 +1050,152 @@ class FallbackDirectory(Directory):
     """
 
     try:
-      fallback_dir_page = str_tools._to_unicode(urllib.urlopen(GITWEB_FALLBACK_DIR_URL, timeout = timeout).read())
+      lines = str_tools._to_unicode(urllib.urlopen(GITWEB_FALLBACK_DIR_URL, timeout = timeout).read()).splitlines()
     except:
       exc = sys.exc_info()[1]
       raise IOError("Unable to download tor's fallback directories from %s: %s" % (GITWEB_FALLBACK_DIR_URL, exc))
 
-    return FallbackDirectory._parse_v2(fallback_dir_page)
+    if not lines:
+      raise IOError('%s did not have any content' % GITWEB_FALLBACK_DIR_URL)
+    elif lines[0] != '/* type=fallback */':
+      raise IOError('%s does not have a type field indicating it is fallback directory metadata' % GITWEB_FALLBACK_DIR_URL)
+
+    # header metadata
+
+    header = {}
+
+    for line in FallbackDirectory._pop_section(lines):
+      mapping = FALLBACK_MAPPING.match(line)
+
+      if mapping:
+        header[mapping.group(1)] = mapping.group(2)
+      else:
+        raise IOError('Malformed fallback directory header line: %s' % line)
+
+    # human readable comments
+
+    FallbackDirectory._pop_section(lines)
+
+    # content, everything remaining are fallback directories
+
+    results = {}
+
+    while lines:
+      # Example of an entry...
+      #
+      #   "5.9.110.236:9030 orport=9001 id=0756B7CD4DFC8182BE23143FAC0642F515182CEB"
+      #   " ipv6=[2a01:4f8:162:51e2::2]:9001"
+      #   /* nickname=rueckgrat */
+      #   /* extrainfo=1 */
+
+      section = FallbackDirectory._pop_section(lines)
+
+      if not section:
+        continue
+
+      address, or_port, dir_port, fingerprint = FallbackDirectory._parse_addr(section)
+
+      results[fingerprint] = FallbackDirectory(
+        address = address,
+        or_port = or_port,
+        dir_port = dir_port,
+        fingerprint = fingerprint,
+        nickname = FallbackDirectory._parse_nickname(section, fingerprint),
+        has_extrainfo = FallbackDirectory._parse_has_extrainfo(section),
+        orport_v6 = FallbackDirectory._parse_ipv6(section, fingerprint),
+      )
+
+    return results
 
   @staticmethod
-  def _parse_v2(fallback_dir_page):
-    # Example of an entry...
-    #
-    #   "5.9.110.236:9030 orport=9001 id=0756B7CD4DFC8182BE23143FAC0642F515182CEB"
-    #   " ipv6=[2a01:4f8:162:51e2::2]:9001"
-    #   /* nickname=rueckgrat */
-    #   /* extrainfo=1 */
-
-    results, attr = {}, {}
-
-    for line in fallback_dir_page.splitlines():
-      addr_line_match = re.match('"([\d\.]+):(\d+) orport=(\d+) id=([\dA-F]{40}).*', line)
-      nickname_match = re.match('/\* nickname=(\S+) \*/', line)
-      has_extrainfo_match = re.match('/\* extrainfo=([0-1]) \*/', line)
-      ipv6_line_match = re.match('" ipv6=\[([\da-f:]+)\]:(\d+)"', line)
-
-      if addr_line_match:
-        address, dir_port, or_port, fingerprint = addr_line_match.groups()
-
-        if not connection.is_valid_ipv4_address(address):
-          raise IOError('%s has an invalid IPv4 address: %s' % (fingerprint, address))
-        elif not connection.is_valid_port(or_port):
-          raise IOError('%s has an invalid or_port: %s' % (fingerprint, or_port))
-        elif not connection.is_valid_port(dir_port):
-          raise IOError('%s has an invalid dir_port: %s' % (fingerprint, dir_port))
-        elif not tor_tools.is_valid_fingerprint(fingerprint):
-          raise IOError('%s has an invalid fingerprint: %s' % (fingerprint, fingerprint))
-
-        attr = {
-          'address': address,
-          'or_port': int(or_port),
-          'dir_port': int(dir_port),
-          'fingerprint': fingerprint,
-        }
-      elif ipv6_line_match:
-        address, port = ipv6_line_match.groups()
+  def _pop_section(lines):
+    """
+    Provides lines up through the next divider. This excludes lines with just a
+    comma since they're an artifact of these being C strings.
+    """
 
-        if not connection.is_valid_ipv6_address(address):
-          raise IOError('%s has an invalid IPv6 address: %s' % (fingerprint, address))
-        elif not connection.is_valid_port(port):
-          raise IOError('%s has an invalid ORPort for its IPv6 endpoint: %s' % (fingerprint, port))
+    section_lines = []
+
+    if lines:
+      line = lines.pop(0)
+
+      while lines and line != FALLBACK_DIV:
+        if line.strip() != ',':
+          section_lines.append(line)
 
-        attr['orport_v6'] = (address, int(port))
-      elif nickname_match:
-        nickname = nickname_match.group(1)
+        line = lines.pop(0)
+
+    return section_lines
+
+  @staticmethod
+  def _parse_addr(lines):
+    """
+    Provides the mandatory address information of a fallback.
+    """
+
+    addr_match = FALLBACK_ADDR.match(lines[0])
+
+    if not addr_match:
+      raise IOError('Malformed initial fallback line:\n\n%s' % '\n'.join(lines))
+
+    address, dir_port, or_port, fingerprint = addr_match.groups()
+
+    if not connection.is_valid_ipv4_address(address):
+      raise IOError('%s has an invalid IPv4 address: %s' % (fingerprint, address))
+    elif not connection.is_valid_port(or_port):
+      raise IOError('%s has an invalid or_port: %s' % (fingerprint, or_port))
+    elif not connection.is_valid_port(dir_port):
+      raise IOError('%s has an invalid dir_port: %s' % (fingerprint, dir_port))
+    elif not tor_tools.is_valid_fingerprint(fingerprint):
+      raise IOError('%s has an invalid fingerprint: %s' % (fingerprint, fingerprint))
+
+    return address, int(or_port), int(dir_port), fingerprint
+
+  @staticmethod
+  def _parse_nickname(lines, fingerprint):
+    """
+    Provides the nickname of the fallback.
+    """
+
+    for line in lines:
+      match = FALLBACK_NICKNAME.match(line)
+
+      if match:
+        nickname = match.group(1)
 
         if not tor_tools.is_valid_nickname(nickname):
           raise IOError('%s has an invalid nickname: %s' % (fingerprint, nickname))
 
-        attr['nickname'] = nickname
-      elif has_extrainfo_match:
-        attr['has_extrainfo'] = has_extrainfo_match.group(1) == '1'
-
-        results[attr.get('fingerprint')] = FallbackDirectory(
-          address = attr.get('address'),
-          or_port = attr.get('or_port'),
-          dir_port = attr.get('dir_port'),
-          fingerprint = attr.get('fingerprint'),
-          nickname = attr.get('nickname'),
-          has_extrainfo = attr.get('has_extrainfo', False),
-          orport_v6 = attr.get('orport_v6'),
-        )
+        return nickname
+
+  @staticmethod
+  def _parse_has_extrainfo(lines):
+    for line in lines:
+      match = FALLBACK_EXTRAINFO.match(line)
 
-        attr = {}
+      if match:
+        return match.group(1) == '1'
 
-    return results
+    return False
+
+  @staticmethod
+  def _parse_ipv6(lines, fingerprint):
+    """
+    Provides the IPv6 ORPort for a fallback.
+    """
+
+    for line in lines:
+      match = FALLBACK_IPV6.match(line)
+
+      if match:
+        address, port = match.groups()
+
+        if not connection.is_valid_ipv6_address(address):
+          raise IOError('%s has an invalid IPv6 address: %s' % (fingerprint, address))
+        elif not connection.is_valid_port(port):
+          raise IOError('%s has an invalid ORPort for its IPv6 endpoint: %s' % (fingerprint, port))
+
+        return (address, int(port))
 
   def __hash__(self):
     return _hash_attr(self, 'orport_v6', parent = Directory)
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index 414be7e8..17e17595 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -59,10 +59,36 @@ iO3EUE0AEYah2W9gdz8t+i3Dtr0zgqLS841GC/TyDKCm+MKmN8d098qnwK0NGF9q
 """
 
 FALLBACK_DIR_CONTENT = b"""\
+/* type=fallback */
+/* version=2.0.0 */
+/* timestamp=20170526090242 */
+/* ===== */
+/* Whitelist & blacklist excluded 1326 of 1513 candidates. */
+/* Checked IPv4 DirPorts served a consensus within 15.0s. */
+/*
+Final Count: 151 (Eligible 187, Target 392 (1963 * 0.20), Max 200)
+Excluded: 36 (Same Operator 27, Failed/Skipped Download 9, Excess 0)
+Bandwidth Range: 1.3 - 40.0 MByte/s
+*/
+/*
+Onionoo Source: details Date: 2017-05-16 07:00:00 Version: 4.0
+URL: https:onionoo.torproject.orgdetails?fields=fingerprint%2Cnickname%2Ccontact%2Clast_changed_address_or_port%2Cconsensus_weight%2Cadvertised_bandwidth%2Cor_addresses%2Cdir_address%2Crecommended_version%2Cflags%2Ceffective_family%2Cplatform&flag=V2Dir&type=relay&last_seen_days=-0&first_seen_days=30-
+*/
+/*
+Onionoo Source: uptime Date: 2017-05-16 07:00:00 Version: 4.0
+URL: https:onionoo.torproject.orguptime?first_seen_days=30-&flag=V2Dir&type=relay&last_seen_days=-0
+*/
+/* ===== */
 "5.9.110.236:9030 orport=9001 id=0756B7CD4DFC8182BE23143FAC0642F515182CEB"
 " ipv6=[2a01:4f8:162:51e2::2]:9001"
 /* nickname=rueckgrat */
 /* extrainfo=1 */
+/* ===== */
+,
+"193.171.202.146:9030 orport=9001 id=01A9258A46E97FF8B2CAC7910577862C14F2C524"
+/* nickname= */
+/* extrainfo=0 */
+/* ===== */
 """
 
 
@@ -187,6 +213,15 @@ class TestDescriptorDownloader(unittest.TestCase):
         has_extrainfo = True,
         orport_v6 = ('2a01:4f8:162:51e2::2', 9001),
       ),
+      '01A9258A46E97FF8B2CAC7910577862C14F2C524': stem.descriptor.remote.FallbackDirectory(
+        address = '193.171.202.146',
+        or_port = 9001,
+        dir_port = 9030,
+        fingerprint = '01A9258A46E97FF8B2CAC7910577862C14F2C524',
+        nickname = None,
+        has_extrainfo = False,
+        orport_v6 = None,
+      ),
     }
 
     self.assertEqual(expected, fallback_directories)





More information about the tor-commits mailing list