[tor-commits] [stem/master] Class for issuing remote descriptor queries

atagar at torproject.org atagar at torproject.org
Mon Jul 22 03:10:17 UTC 2013


commit 762f0ad396dd752f0f807871e0715c142cdbf475
Author: Damian Johnson <atagar at torproject.org>
Date:   Fri Jul 5 19:18:02 2013 -0700

    Class for issuing remote descriptor queries
    
    Adding a Query class which does most of the heavy lifting for downloading
    descriptors.
---
 stem/control.py                           |    4 +-
 stem/descriptor/__init__.py               |    1 +
 stem/descriptor/remote.py                 |  174 ++++++++++++++++++++++++++++-
 stem/descriptor/server_descriptor.py      |    3 +
 test/settings.cfg                         |    1 +
 test/unit/descriptor/remote.py            |  126 +++++++++++++++++++++
 test/unit/descriptor/server_descriptor.py |    8 +-
 7 files changed, 308 insertions(+), 9 deletions(-)

diff --git a/stem/control.py b/stem/control.py
index 8a8f410..0f589e1 100644
--- a/stem/control.py
+++ b/stem/control.py
@@ -1255,8 +1255,8 @@ class Controller(BaseController):
 
     :raises:
       * :class:`stem.ControllerError` if unable to query the descriptor
-      * **ValueError** if **relay** doesn't conform with the patter for being a
-        fingerprint or nickname
+      * **ValueError** if **relay** doesn't conform with the pattern for being
+        a fingerprint or nickname
 
       An exception is only raised if we weren't provided a default response.
     """
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index dc982f4..a527329 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -137,6 +137,7 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
   :returns: iterator for :class:`~stem.descriptor.__init__.Descriptor` instances in the file
 
   :raises:
+    * **ValueError** if the contents is malformed and validate is True
     * **TypeError** if we can't match the contents of the file to a descriptor type
     * **IOError** if unable to read from the descriptor_file
   """
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index ea8dbb3..1f28bbd 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -4,8 +4,9 @@
 """
 Utilities for retrieving descriptors from directory authorities and mirrors.
 This is mostly done through the
-:class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues the
-requests and provides back parsed content. For example...
+:class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues
+:class:`~stem.descriptor.remote.Query` to get descriptor content. For
+example...
 
 ::
 
@@ -14,13 +15,174 @@ requests and provides back parsed content. For example...
     use_mirrors = True,
   )
 
+  query = downloader.get_server_descriptors()
+
+  print "Exit Relays:"
+
   try:
-    for desc in downloader.get_server_descriptors():
+    for desc in query.run():
       if desc.exit_policy.is_exiting_allowed():
-        print "%s (%s)" % (desc.nickname, desc.fingerprint)
-  except IOError, exc:
-    print "Unable to query the server descriptors: %s" % exc
+        print "  %s (%s)" % (desc.nickname, desc.fingerprint)
+
+    print
+    print "Query took %0.2f seconds" % query.runtime
+  except Exception as exc:
+    print "Unable to query the server descriptors: %s" % query.error
+
+If you don't care about errors then you can also simply iterate over the query
+itself...
+
+::
+
+  for desc in downloader.get_server_descriptors():
+    if desc.exit_policy.is_exiting_allowed():
+      print "  %s (%s)" % (desc.nickname, desc.fingerprint)
 """
 
+import sys
+import threading
+import time
+import urllib2
+
+import stem.descriptor
+
+# Tor directory authorities as of commit f631b73 (7/4/13):
+# https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816
+
+DIRECTORY_AUTHORITIES = {
+  'moria1': ('128.31.0.39', 9131),
+  'tor26': ('86.59.21.38', 80),
+  'dizum': ('194.109.206.212', 80),
+  'Tonga': ('82.94.251.203', 80),
+  'turtles': ('76.73.17.194', 9030),
+  'gabelmoo': ('212.112.245.170', 80),
+  'dannenberg': ('193.23.244.244', 80),
+  'urras': ('208.83.223.34', 443),
+  'maatuska': ('171.25.193.9', 443),
+  'Faravahar': ('154.35.32.5', 80),
+}
+
+
+class Query(object):
+  """
+  Asynchronous request for descriptor content from a directory authority or
+  mirror. The caller can block on the response by either calling
+  :func:~stem.descriptor.remote.run: or iterating over our descriptor content.
+
+  :var str address: address of the authority or mirror we're querying
+  :var int port: directory port we're querying
+  :var str resource: resource being fetched, such as '/tor/status-vote/current/consensus.z'
+
+  :var Exception error: exception if a problem occured
+  :var bool is_done: flag that indicates if our request has finished
+  :var str descriptor_type: type of descriptors being fetched, see :func:`~stem.descriptor.__init__.parse_file`
+
+  :var float start_time: unix timestamp when we first started running
+  :var float timeout: duration before we'll time out our request
+  :var float runtime: time our query took, this is **None** if it's not yet finished
+  """
+
+  def __init__(self, address, port, resource, descriptor_type, timeout = None, start = True):
+    self.address = address
+    self.port = port
+    self.resource = resource
+
+    self.error = None
+    self.is_done = False
+    self.descriptor_type = descriptor_type
+
+    self.start_time = None
+    self.timeout = timeout
+    self.runtime = None
+
+    self._downloader_thread = None
+    self._downloader_thread_lock = threading.RLock()
+
+    self._results = None  # descriptor iterator
+
+    if start:
+      self.start()
+
+  def get_url(self):
+    """
+    Provides the url being queried.
+
+    :returns: **str** for the url being queried by this request
+    """
+
+    return "http://%s:%i/%s" % (self.address, self.port, self.resource.lstrip('/'))
+
+  def start(self):
+    """
+    Starts downloading the scriptors if we haven't started already.
+    """
+
+    with self._downloader_thread_lock:
+      if self._downloader_thread is None:
+        self._downloader_thread = threading.Thread(target = self._download_descriptors, name="Descriptor Query")
+        self._downloader_thread.setDaemon(True)
+        self._downloader_thread.start()
+
+  def run(self, suppress = False):
+    """
+    Blocks until our request is complete then provides the descriptors. If we
+    haven't yet started our request then this does so.
+
+    :param bool suppress: avoids raising exceptions if **True**
+
+    :returns: iterator for the requested :class:`~stem.descriptor.__init__.Descriptor` instances
+
+    :raises:
+      Using the iterator can fail with the following if **suppress** is
+      **False**...
+
+        * **ValueError** if the descriptor contents is malformed
+        * **socket.timeout** if our request timed out
+        * **urllib2.URLError** for most request failures
+
+      Note that the urllib2 module may fail with other exception types, in
+      which case we'll pass it along.
+    """
+
+    with self._downloader_thread_lock:
+      self.start()
+      self._downloader_thread.join()
+
+      if self.error:
+        if not suppress:
+          raise self.error
+      else:
+        if self._results is None:
+          if not suppress:
+            raise ValueError('BUG: _download_descriptors() finished without either results or an error')
+
+          return
+
+        try:
+          for desc in self._results:
+            yield desc
+        except ValueError as exc:
+          self.error = exc
+
+          if not suppress:
+            raise self.error
+
+  def __iter__(self):
+    for desc in self.run(True):
+      yield desc
+
+  def _download_descriptors(self):
+    try:
+      self.start_time = time.time()
+      response = urllib2.urlopen(self.get_url(), timeout = self.timeout)
+      self.runtime = time.time() - self.start_time
+
+      self._results = stem.descriptor.parse_file(response, self.descriptor_type)
+    except:
+      self.error = sys.exc_info()[1]
+    finally:
+      self.is_done = True
+
+
 class DescriptorDownloader(object):
   pass
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 4af4976..d23f41e 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -136,6 +136,9 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True):
       else:
         yield RelayDescriptor(descriptor_text, validate, annotations)
     else:
+      if validate and annotations:
+        raise ValueError('Content conform to being a server descriptor:\n%s' % '\n'.join(annotations))
+
       break  # done parsing descriptors
 
 
diff --git a/test/settings.cfg b/test/settings.cfg
index d5c0997..68b120d 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -156,6 +156,7 @@ test.unit_tests
 |test.unit.util.tor_tools.TestTorTools
 |test.unit.descriptor.export.TestExport
 |test.unit.descriptor.reader.TestDescriptorReader
+|test.unit.descriptor.remote.TestDescriptorDownloader
 |test.unit.descriptor.server_descriptor.TestServerDescriptor
 |test.unit.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor
 |test.unit.descriptor.microdescriptor.TestMicrodescriptor
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
new file mode 100644
index 0000000..9e8b5cc
--- /dev/null
+++ b/test/unit/descriptor/remote.py
@@ -0,0 +1,126 @@
+"""
+Unit tests for stem.descriptor.remote.
+"""
+
+import io
+import socket
+import unittest
+
+import stem.descriptor.remote
+
+from mock import patch
+
+# Output from requesting moria1's descriptor from itself...
+# % curl http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31
+
+TEST_DESCRIPTOR = b"""\
+router moria1 128.31.0.34 9101 0 9131
+platform Tor 0.2.5.0-alpha-dev on Linux
+protocols Link 1 2 Circuit 1
+published 2013-07-05 23:48:52
+fingerprint 9695 DFC3 5FFE B861 329B 9F1A B04C 4639 7020 CE31
+uptime 1818933
+bandwidth 512000 62914560 1307929
+extra-info-digest 17D0142F6EBCDF60160EB1794FA6C9717D581F8C
+caches-extra-info
+onion-key
+-----BEGIN RSA PUBLIC KEY-----
+MIGJAoGBALzd4bhz1usB7wpoaAvP+BBOnNIk7mByAKV6zvyQ0p1M09oEmxPMc3qD
+AAm276oJNf0eq6KWC6YprzPWFsXEIdXSqA6RWXCII1JG/jOoy6nt478BkB8TS9I9
+1MJW27ppRaqnLiTmBmM+qzrsgJGwf+onAgUKKH2GxlVgahqz8x6xAgMBAAE=
+-----END RSA PUBLIC KEY-----
+signing-key
+-----BEGIN RSA PUBLIC KEY-----
+MIGJAoGBALtJ9uD7cD7iHjqNA3AgsX9prES5QN+yFQyr2uOkxzhvunnaf6SNhzWW
+bkfylnMrRm/qCz/czcjZO6N6EKHcXmypehvP566B7gAQ9vDsb+l7VZVWgXvzNc2s
+tl3P7qpC08rgyJh1GqmtQTCesIDqkEyWxwToympCt09ZQRq+fIttAgMBAAE=
+-----END RSA PUBLIC KEY-----
+hidden-service-dir
+contact 1024D/28988BF5 arma mit edu
+ntor-onion-key 9ZVjNkf/iLEnD685SpC5kcDytQ7u5ViiI9JOftdbE0k=
+reject *:*
+router-signature
+-----BEGIN SIGNATURE-----
+Y8Tj2e7mPbFJbguulkPEBVYzyO57p4btpWEXvRMD6vxIh/eyn25pehg5dUVBtZlL
+iO3EUE0AEYah2W9gdz8t+i3Dtr0zgqLS841GC/TyDKCm+MKmN8d098qnwK0NGF9q
+01NZPuSqXM1b6hnl2espFzL7XL8XEGRU+aeg+f/ukw4=
+-----END SIGNATURE-----
+"""
+
+
+class TestDescriptorDownloader(unittest.TestCase):
+  @patch('urllib2.urlopen')
+  def test_query_download(self, urlopen_mock):
+    """
+    Check Query functionality when we successfully download a descriptor.
+    """
+
+    urlopen_mock.return_value = io.BytesIO(TEST_DESCRIPTOR)
+
+    query = stem.descriptor.remote.Query(
+      '128.31.0.39',
+      9131,
+      '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31',
+      'server-descriptor 1.0',
+    )
+
+    expeced_url = 'http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31'
+    self.assertEqual(expeced_url, query.get_url())
+
+    descriptors = list(query)
+    self.assertEqual(1, len(descriptors))
+    desc = descriptors[0]
+
+    self.assertEqual('moria1', desc.nickname)
+    self.assertEqual('128.31.0.34', desc.address)
+    self.assertEqual('9695DFC35FFEB861329B9F1AB04C46397020CE31', desc.fingerprint)
+    self.assertEqual(TEST_DESCRIPTOR, desc.get_bytes())
+
+    urlopen_mock.assert_called_once_with(expeced_url, timeout = None)
+
+  @patch('urllib2.urlopen')
+  def test_query_with_malformed_content(self, urlopen_mock):
+    """
+    Query with malformed descriptor content.
+    """
+
+    descriptor_content = b'some malformed stuff'
+    urlopen_mock.return_value = io.BytesIO(descriptor_content)
+
+    query = stem.descriptor.remote.Query(
+      '128.31.0.39',
+      9131,
+      '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31',
+      'server-descriptor 1.0',
+    )
+
+    # checking via the iterator
+
+    expected_error_msg = 'Content conform to being a server descriptor:\nsome malformed stuff'
+
+    descriptors = list(query)
+    self.assertEqual(0, len(descriptors))
+    self.assertEqual(ValueError, type(query.error))
+    self.assertEqual(expected_error_msg, str(query.error))
+
+    # check via the run() method
+
+    self.assertRaises(ValueError, list, query.run())
+
+  @patch('urllib2.urlopen')
+  def test_query_with_timeout(self, urlopen_mock):
+    urlopen_mock.side_effect = socket.timeout('connection timed out')
+
+    query = stem.descriptor.remote.Query(
+      '128.31.0.39',
+      9131,
+      '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31',
+      'server-descriptor 1.0',
+      5,
+    )
+
+    self.assertRaises(socket.timeout, list, query.run())
+    urlopen_mock.assert_called_once_with(
+      'http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31',
+      timeout = 5,
+    )
diff --git a/test/unit/descriptor/server_descriptor.py b/test/unit/descriptor/server_descriptor.py
index c679588..50f27f6 100644
--- a/test/unit/descriptor/server_descriptor.py
+++ b/test/unit/descriptor/server_descriptor.py
@@ -213,10 +213,16 @@ class TestServerDescriptor(unittest.TestCase):
 
     desc_text = b"@pepperjack very tasty\n at mushrooms not so much\n"
     desc_text += get_relay_server_descriptor(content = True)
-    desc_text += b"\ntrailing text that should be ignored, ho hum"
+    desc_text += b"\ntrailing text that should be invalid, ho hum"
 
     # running _parse_file should provide an iterator with a single descriptor
     desc_iter = stem.descriptor.server_descriptor._parse_file(io.BytesIO(desc_text))
+    self.assertRaises(ValueError, list, desc_iter)
+
+    desc_text = b"@pepperjack very tasty\n at mushrooms not so much\n"
+    desc_text += get_relay_server_descriptor(content = True)
+    desc_iter = stem.descriptor.server_descriptor._parse_file(io.BytesIO(desc_text))
+
     desc_entries = list(desc_iter)
     self.assertEquals(1, len(desc_entries))
     desc = desc_entries[0]





More information about the tor-commits mailing list