[tor-commits] [stem/master] Add DirectoryAuthority.from_remote()

atagar at torproject.org atagar at torproject.org
Thu May 3 22:22:59 UTC 2018


commit 4dfb17200a902ef7c4f4e075da36c833d850be76
Author: Damian Johnson <atagar at torproject.org>
Date:   Thu May 3 14:51:31 2018 -0700

    Add DirectoryAuthority.from_remote()
    
    Recently teor split tor's directory authority information into its own file,
    simplifying parsing a great deal...
    
      https://gitweb.torproject.org/tor.git/plain/src/or/auth_dirs.inc
    
    Like fallbacks, adding a DirectoryAuthority.from_remote() function which
    downloads and parses tor's current authority data. This also makes a
    'is Stem's cached dirauth data up to date' test a cinch. ;P
    
    While whipping this up I found a handful of additional things I'd like to
    follow this up with...
    
      * Our stem.descriptor.remote module has grown way too large. When I first
        added directory authorities it was just a small constant. But now with
        parsing and fallback directories our module's 1707 lines. Way past the
        point where we should split things up.
    
        I'll move dirauths and fallbacks to their own files, with aliases for
        temporary backward compatability (those aliases will be dropped in
        Stem 2.0).
    
      * This new parser needs unit test coverage. Probably similar to our tests for
        FallbackDirectory.from_remote().
    
      * We should replace our get_authorities() function with
        DirectoryAuthority.from_cache() for consistancy with the
        FallbackDirectory and Manual classes.
    
      * Now that dirauths note IPv6 ORPorts we should move that attribute from the
        FallbackDirectory class up to the common Directory parent.
---
 stem/descriptor/remote.py       | 163 +++++++++++++++++++++++++++++++++++++---
 test/integ/descriptor/remote.py |  30 +++++++-
 test/unit/descriptor/remote.py  |   4 +-
 3 files changed, 183 insertions(+), 14 deletions(-)

diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 480297f1..a656e354 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -167,9 +167,15 @@ LZMA_UNAVAILABLE_MSG = 'LZMA compression requires the lzma module (https://docs.
 MAX_FINGERPRINTS = 96
 MAX_MICRODESCRIPTOR_HASHES = 90
 
-GITWEB_FALLBACK_DIR_URL = 'https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc'
+GITWEB_AUTHORITY_URL = 'https://gitweb.torproject.org/tor.git/plain/src/or/auth_dirs.inc'
+GITWEB_FALLBACK_URL = 'https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc'
 CACHE_PATH = os.path.join(os.path.dirname(__file__), 'fallback_directories.cfg')
 
+AUTHORITY_NAME = re.compile('"(\S+) orport=(\d+) .*"')
+AUTHORITY_V3IDENT = re.compile('"v3ident=([\dA-F]{40}) "')
+AUTHORITY_IPV6 = re.compile('"ipv6=\[([\da-f:]+)\]:(\d+) "')
+AUTHORITY_ADDR = re.compile('"([\d\.]+):(\d+) ([\dA-F ]{49})",')
+
 FALLBACK_DIV = '/* ===== */'
 FALLBACK_MAPPING = re.compile('/\*\s+(\S+)=(\S*)\s+\*/')
 
@@ -1088,6 +1094,143 @@ class DirectoryAuthority(Directory):
     self.v3ident = v3ident
     self.is_bandwidth_authority = is_bandwidth_authority
 
+  @staticmethod
+  def from_remote(timeout = 60):
+    """
+    Reads and parses tor's latest directory authority data `from
+    gitweb.torproject.org
+    <https://gitweb.torproject.org/tor.git/plain/src/or/auth_dirs.inc>`_.
+    Note that while convenient, this reliance on GitWeb means you should alway
+    call with a fallback, such as...
+
+    ::
+
+      try:
+        fallback_directories = DirectoryAuthority.from_remote()
+      except IOError:
+        fallback_directories = get_authorities()
+
+    Authorities provided through this method will not have a
+    **is_bandwidth_authority** value set.
+
+    .. versionadded:: 1.7.0
+
+    :param int timeout: seconds to wait before timing out the request
+
+    :returns: **dict** of **str** nicknames to their
+      :class:`~stem.descriptor.remote.DirectoryAuthority`
+
+    :raises: **IOError** if unable to retrieve the directory authorities
+    """
+
+    try:
+      lines = str_tools._to_unicode(urllib.urlopen(GITWEB_AUTHORITY_URL, timeout = timeout).read()).splitlines()
+    except:
+      exc = sys.exc_info()[1]
+      raise IOError("Unable to download tor's directory authorities from %s: %s" % (GITWEB_AUTHORITY_URL, exc))
+
+    if not lines:
+      raise IOError('%s did not have any content' % GITWEB_AUTHORITY_URL)
+
+    results = {}
+
+    while lines:
+      section = DirectoryAuthority._pop_section(lines)
+
+      if section:
+        try:
+          authority = DirectoryAuthority._from_str('\n'.join(section))
+          results[authority.nickname] = authority
+        except ValueError as exc:
+          raise IOError(str(exc))
+
+    return results
+
+  @staticmethod
+  def _from_str(content):
+    """
+    Parses authority from its textual representation. For example...
+
+    ::
+
+      "moria1 orport=9101 "
+        "v3ident=D586D18309DED4CD6D57C18FDB97EFA96D330566 "
+        "128.31.0.39:9131 9695 DFC3 5FFE B861 329B 9F1A B04C 4639 7020 CE31",
+
+    :param str content: text to parse
+
+    :returns: :class:`~stem.descriptor.remote.DirectoryAuthority` in the text
+
+    :raises: **ValueError** if content is malformed
+    """
+
+    if isinstance(content, bytes):
+      content = str_tools._to_unicode(content)
+
+    matches = {}
+
+    for line in content.splitlines():
+      for matcher in (AUTHORITY_NAME, AUTHORITY_V3IDENT, AUTHORITY_IPV6, AUTHORITY_ADDR):
+        m = matcher.match(line.strip())
+
+        if m:
+          match_groups = m.groups()
+          matches[matcher] = match_groups if len(match_groups) > 1 else match_groups[0]
+
+    if AUTHORITY_NAME not in matches:
+      raise ValueError('Unable to parse the name and orport from:\n\n%s' % content)
+    elif AUTHORITY_ADDR not in matches:
+      raise ValueError('Unable to parse the address and fingerprint from:\n\n%s' % content)
+
+    nickname, or_port = matches.get(AUTHORITY_NAME)
+    v3ident = matches.get(AUTHORITY_V3IDENT)
+    orport_v6 = matches.get(AUTHORITY_IPV6)  # TODO: add this to stem's data?
+    address, dir_port, fingerprint = matches.get(AUTHORITY_ADDR)
+
+    fingerprint = fingerprint.replace(' ', '')
+
+    if not connection.is_valid_ipv4_address(address):
+      raise ValueError('%s has an invalid IPv4 address: %s' % (nickname, address))
+    elif not connection.is_valid_port(or_port):
+      raise ValueError('%s has an invalid or_port: %s' % (nickname, or_port))
+    elif not connection.is_valid_port(dir_port):
+      raise ValueError('%s has an invalid dir_port: %s' % (nickname, dir_port))
+    elif not tor_tools.is_valid_fingerprint(fingerprint):
+      raise ValueError('%s has an invalid fingerprint: %s' % (nickname, fingerprint))
+    elif nickname and not tor_tools.is_valid_nickname(nickname):
+      raise ValueError('%s has an invalid nickname: %s' % (nickname, nickname))
+    elif orport_v6 and not connection.is_valid_ipv6_address(orport_v6[0]):
+      raise ValueError('%s has an invalid IPv6 address: %s' % (nickname, orport_v6[0]))
+    elif orport_v6 and not connection.is_valid_port(orport_v6[1]):
+      raise ValueError('%s has an invalid ORPort for its IPv6 endpoint: %s' % (nickname, orport_v6[1]))
+    elif v3ident and not tor_tools.is_valid_fingerprint(v3ident):
+      raise ValueError('%s has an invalid v3ident: %s' % (nickname, v3ident))
+
+    return DirectoryAuthority(
+      address = address,
+      or_port = int(or_port),
+      dir_port = int(dir_port),
+      fingerprint = fingerprint,
+      nickname = nickname,
+      v3ident = v3ident,
+    )
+
+  @staticmethod
+  def _pop_section(lines):
+    """
+    Provides the next authority entry.
+    """
+
+    section_lines = []
+
+    if lines:
+      section_lines.append(lines.pop(0))
+
+      while lines and lines[0].startswith(' '):
+        section_lines.append(lines.pop(0))
+
+    return section_lines
+
   def __hash__(self):
     return _hash_attr(self, 'nickname', 'v3ident', 'is_bandwidth_authority', parent = Directory)
 
@@ -1333,9 +1476,9 @@ class FallbackDirectory(Directory):
     ::
 
       try:
-        fallback_directories = stem.descriptor.remote.from_remote()
+        fallback_directories = FallbackDirectory.from_remote()
       except IOError:
-        fallback_directories = stem.descriptor.remote.from_cache()
+        fallback_directories = FallbackDirectory.from_cache()
 
     :param int timeout: seconds to wait before timing out the request
 
@@ -1346,15 +1489,15 @@ class FallbackDirectory(Directory):
     """
 
     try:
-      lines = str_tools._to_unicode(urllib.urlopen(GITWEB_FALLBACK_DIR_URL, timeout = timeout).read()).splitlines()
+      lines = str_tools._to_unicode(urllib.urlopen(GITWEB_FALLBACK_URL, timeout = timeout).read()).splitlines()
     except:
       exc = sys.exc_info()[1]
-      raise IOError("Unable to download tor's fallback directories from %s: %s" % (GITWEB_FALLBACK_DIR_URL, exc))
+      raise IOError("Unable to download tor's fallback directories from %s: %s" % (GITWEB_FALLBACK_URL, exc))
 
     if not lines:
-      raise IOError('%s did not have any content' % GITWEB_FALLBACK_DIR_URL)
+      raise IOError('%s did not have any content' % GITWEB_FALLBACK_URL)
     elif lines[0] != '/* type=fallback */':
-      raise IOError('%s does not have a type field indicating it is fallback directory metadata' % GITWEB_FALLBACK_DIR_URL)
+      raise IOError('%s does not have a type field indicating it is fallback directory metadata' % GITWEB_FALLBACK_URL)
 
     # header metadata
 
@@ -1381,7 +1524,7 @@ class FallbackDirectory(Directory):
 
       if section:
         try:
-          fallback = FallbackDirectory.from_str('\n'.join(section))
+          fallback = FallbackDirectory._from_str('\n'.join(section))
           fallback.header = header
           results[fallback.fingerprint] = fallback
         except ValueError as exc:
@@ -1390,7 +1533,7 @@ class FallbackDirectory(Directory):
     return results
 
   @staticmethod
-  def from_str(content):
+  def _from_str(content):
     """
     Parses a fallback from its textual representation. For example...
 
@@ -1401,8 +1544,6 @@ class FallbackDirectory(Directory):
       /* nickname=rueckgrat */
       /* extrainfo=1 */
 
-    .. versionadded:: 1.7.0
-
     :param str content: text to parse
 
     :returns: :class:`~stem.descriptor.remote.FallbackDirectory` in the text
diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py
index f56e1b02..b3553598 100644
--- a/test/integ/descriptor/remote.py
+++ b/test/integ/descriptor/remote.py
@@ -247,7 +247,35 @@ class TestDescriptorDownloader(unittest.TestCase):
     self.assertEqual(2, len(list(multiple_query)))
 
   @test.require.online
-  def test_that_cache_is_up_to_date(self):
+  def test_authority_cache_is_up_to_date(self):
+    """
+    Check if the cached authorities bundled with Stem are up to date or not.
+    """
+
+    cached_authorities = stem.descriptor.remote.get_authorities()
+    latest_authorities = stem.descriptor.remote.DirectoryAuthority.from_remote()
+
+    for nickname in cached_authorities:
+      if nickname not in latest_authorities:
+        self.fail('%s is no longer a directory authority in tor' % nickname)
+
+    for nickname in latest_authorities:
+      if nickname not in cached_authorities:
+        self.fail('%s is now a directory authority in tor' % nickname)
+
+    # tor doesn't note if an autority is a bwauth or not, so we need to exclude
+    # that from our comparison
+
+    for attr in ('address', 'or_port', 'dir_port', 'fingerprint', 'nickname', 'v3ident'):
+      for auth in cached_authorities.values():
+        cached_value = getattr(auth, attr)
+        latest_value = getattr(latest_authorities[auth.nickname], attr)
+
+        if cached_value != latest_value:
+          self.fail('The %s of the %s authority is %s in tor but %s in stem' % (attr, auth.nickname, latest_value, cached_value))
+
+  @test.require.online
+  def test_fallback_cache_is_up_to_date(self):
     """
     Check if the cached fallback directories bundled with Stem are up to date
     or not.
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index 753681e0..a02a1171 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -535,7 +535,7 @@ class TestDescriptorDownloader(unittest.TestCase):
       orport_v6 = ('2a01:4f8:162:51e2::2', 9001),
     )
 
-    self.assertEqual(expected, stem.descriptor.remote.FallbackDirectory.from_str(FALLBACK_ENTRY))
+    self.assertEqual(expected, stem.descriptor.remote.FallbackDirectory._from_str(FALLBACK_ENTRY))
 
   def test_fallback_directories_from_str_malformed(self):
     test_values = {
@@ -548,4 +548,4 @@ class TestDescriptorDownloader(unittest.TestCase):
     }
 
     for entry, expected in test_values.items():
-      self.assertRaisesRegexp(ValueError, expected, stem.descriptor.remote.FallbackDirectory.from_str, entry)
+      self.assertRaisesRegexp(ValueError, expected, stem.descriptor.remote.FallbackDirectory._from_str, entry)



More information about the tor-commits mailing list