[tor-commits] [stem/master] Controller methods for fetching all descriptors

atagar at torproject.org atagar at torproject.org
Tue Oct 16 16:06:33 UTC 2012


commit c52521efe273e8091ec95554d24ed172e755d29e
Author: Damian Johnson <atagar at torproject.org>
Date:   Tue Oct 16 08:47:03 2012 -0700

    Controller methods for fetching all descriptors
    
    Adding a couple methods to iterate over the server descriptors or router status
    entries. This definitely isn't what I want in the long run - we should iterate
    over the contents as they're being read from the socket rather than reading the
    whole thing into memory.
    
    Meh. Might as well get the functionality that we want now. The fix will be
    transparent for users later anyway.
---
 stem/control.py                        |   43 ++++++++++++++++++++++++++++
 stem/descriptor/networkstatus.py       |   48 ++-----------------------------
 stem/descriptor/router_status_entry.py |   38 +++++++++++++++++++++++++
 test/integ/control/controller.py       |   42 ++++++++++++++++++++++++++++
 4 files changed, 127 insertions(+), 44 deletions(-)

diff --git a/stem/control.py b/stem/control.py
index 6077564..9fae790 100644
--- a/stem/control.py
+++ b/stem/control.py
@@ -52,6 +52,7 @@ from __future__ import with_statement
 
 import time
 import Queue
+import StringIO
 import threading
 
 import stem.response
@@ -679,6 +680,24 @@ class Controller(BaseController):
     desc_content = self.get_info(query)
     return stem.descriptor.server_descriptor.RelayDescriptor(desc_content)
   
+  def get_server_descriptors(self):
+    """
+    Provides an iterator for all of the server descriptors that tor presently
+    knows about.
+    
+    :returns: iterates over :class:`stem.descriptor.server_descriptor.RelayDescriptor` for relays in the tor network
+    
+    :raises: :class:`stem.socket.ControllerError` if unable to query tor
+    """
+    
+    # TODO: We should iterate over the descriptors as they're read from the
+    # socket rather than reading the whole thing into memeory.
+    
+    desc_content = self.get_info("desc/all-recent")
+    
+    for desc in stem.descriptor.server_descriptor.parse_file(StringIO.StringIO(desc_content)):
+      yield desc
+  
   def get_network_status(self, relay):
     """
     Provides the router status entry for the relay with the given fingerprint
@@ -704,6 +723,30 @@ class Controller(BaseController):
     desc_content = self.get_info(query)
     return stem.descriptor.router_status_entry.RouterStatusEntryV2(desc_content)
   
+  def get_network_statuses(self):
+    """
+    Provides an iterator for all of the router status entries that tor
+    presently knows about.
+    
+    :returns: iterates over :class:`stem.descriptor.router_status_entry.RouterStatusEntryV2` for relays in the tor network
+    
+    :raises: :class:`stem.socket.ControllerError` if unable to query tor
+    """
+    
+    # TODO: We should iterate over the descriptors as they're read from the
+    # socket rather than reading the whole thing into memeory.
+    
+    desc_content = self.get_info("ns/all")
+    
+    desc_iterator = stem.descriptor.router_status_entry.parse_file(
+      StringIO.StringIO(desc_content),
+      True,
+      entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2,
+    )
+    
+    for desc in desc_iterator:
+      yield desc
+  
   def authenticate(self, *args, **kwargs):
     """
     A convenience method to authenticate the controller.
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index 21f7ee0..e8a2679 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -206,7 +206,7 @@ def parse_file(document_file, validate = True, is_microdescriptor = False, docum
   else:
     raise ValueError("Document version %i isn't recognized (only able to parse v2 or v3)" % document_version)
   
-  desc_iterator = _get_entries(
+  desc_iterator = stem.descriptor.router_status_entry.parse_file(
     document_file,
     validate,
     entry_class = router_type,
@@ -219,46 +219,6 @@ def parse_file(document_file, validate = True, is_microdescriptor = False, docum
   for desc in desc_iterator:
     yield desc
 
-def _get_entries(document_file, validate, entry_class, entry_keyword, start_position = None, end_position = None, section_end_keywords = (), extra_args = ()):
-  """
-  Reads a range of the document_file containing some number of entry_class
-  instances. We deliminate the entry_class entries by the keyword on their
-  first line (entry_keyword). When finished the document is left at the
-  end_position.
-  
-  Either a end_position or section_end_keywords must be provided.
-  
-  :param file document_file: file with network status document content
-  :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise
-  :param class entry_class: class to construct instance for
-  :param str entry_keyword: first keyword for the entry instances
-  :param int start_position: start of the section, default is the current position
-  :param int end_position: end of the section
-  :param tuple section_end_keywords: keyword(s) that deliminate the end of the section if no end_position was provided
-  :param tuple extra_args: extra arguments for the entry_class (after the content and validate flag)
-  
-  :returns: iterator over entry_class instances
-  
-  :raises:
-    * ValueError if the contents is malformed and validate is True
-    * IOError if the file can't be read
-  """
-  
-  if start_position is None:
-    start_position = document_file.tell()
-  
-  if end_position is None:
-    if section_end_keywords:
-      stem.descriptor._read_until_keywords(section_end_keywords, document_file, skip = True)
-      end_position = document_file.tell()
-    else:
-      raise ValueError("Either a end_position or section_end_keywords must be provided")
-  
-  document_file.seek(start_position)
-  while document_file.tell() < end_position:
-    desc_content = "".join(stem.descriptor._read_until_keywords(entry_keyword, document_file, ignore_first = True, end_position = end_position))
-    yield entry_class(desc_content, validate, *extra_args)
-
 class NetworkStatusDocument(stem.descriptor.Descriptor):
   """
   Common parent for network status documents.
@@ -325,7 +285,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
     document_file = StringIO.StringIO(raw_content)
     document_content = "".join(stem.descriptor._read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file))
     
-    self.routers = tuple(_get_entries(
+    self.routers = tuple(stem.descriptor.router_status_entry.parse_file(
       document_file,
       validate,
       entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2,
@@ -489,7 +449,7 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
       else:
         self._unrecognized_lines += value
     
-    self.directory_authorities = tuple(_get_entries(
+    self.directory_authorities = tuple(stem.descriptor.router_status_entry.parse_file(
       document_file,
       validate,
       entry_class = DirectoryAuthority,
@@ -503,7 +463,7 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
     else:
       router_type = stem.descriptor.router_status_entry.RouterStatusEntryMicroV3
     
-    self.routers = tuple(_get_entries(
+    self.routers = tuple(stem.descriptor.router_status_entry.parse_file(
       document_file,
       validate,
       entry_class = router_type,
diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py
index d94b2e8..a46d006 100644
--- a/stem/descriptor/router_status_entry.py
+++ b/stem/descriptor/router_status_entry.py
@@ -22,6 +22,44 @@ import datetime
 import stem.descriptor
 import stem.exit_policy
 
+def parse_file(document_file, validate, entry_class, entry_keyword = "r", start_position = None, end_position = None, section_end_keywords = (), extra_args = ()):
+  """
+  Reads a range of the document_file containing some number of entry_class
+  instances. We deliminate the entry_class entries by the keyword on their
+  first line (entry_keyword). When finished the document is left at the
+  end_position.
+  
+  Either a end_position or section_end_keywords must be provided.
+  
+  :param file document_file: file with network status document content
+  :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise
+  :param class entry_class: class to construct instance for
+  :param str entry_keyword: first keyword for the entry instances
+  :param int start_position: start of the section, default is the current position
+  :param int end_position: end of the section
+  :param tuple section_end_keywords: keyword(s) that deliminate the end of the section if no end_position was provided
+  :param tuple extra_args: extra arguments for the entry_class (after the content and validate flag)
+  
+  :returns: iterator over entry_class instances
+  
+  :raises:
+    * ValueError if the contents is malformed and validate is True
+    * IOError if the file can't be read
+  """
+  
+  if start_position is None:
+    start_position = document_file.tell()
+  
+  if end_position is None:
+    if section_end_keywords:
+      stem.descriptor._read_until_keywords(section_end_keywords, document_file, skip = True)
+      end_position = document_file.tell()
+  
+  document_file.seek(start_position)
+  while not end_position or document_file.tell() < end_position:
+    desc_content = "".join(stem.descriptor._read_until_keywords(entry_keyword, document_file, ignore_first = True, end_position = end_position))
+    yield entry_class(desc_content, validate, *extra_args)
+
 class RouterStatusEntry(stem.descriptor.Descriptor):
   """
   Information about an individual router stored within a network status
diff --git a/test/integ/control/controller.py b/test/integ/control/controller.py
index 1482c23..4def79d 100644
--- a/test/integ/control/controller.py
+++ b/test/integ/control/controller.py
@@ -462,6 +462,29 @@ class TestController(unittest.TestCase):
       self.assertEqual(first_descriptor, controller.get_server_descriptor(first_descriptor.fingerprint))
       self.assertEqual(first_descriptor, controller.get_server_descriptor(first_descriptor.nickname))
   
+  def test_get_server_descriptors(self):
+    """
+    Fetches a few descriptors via the get_server_descriptors() method.
+    """
+    
+    runner = test.runner.get_runner()
+    
+    if test.runner.require_control(self): return
+    
+    with runner.get_tor_controller() as controller:
+      count = 0
+      
+      for desc in controller.get_server_descriptors():
+        self.assertTrue(desc.fingerprint != None)
+        self.assertTrue(desc.nickname != None)
+        
+        # Se don't want to take the time to read the whole thing. We already
+        # have another test that reads the full cached descriptors (and takes a
+        # while to do so).
+        
+        count += 1
+        if count > 10: break
+  
   def test_get_network_status(self):
     """
     Compares get_network_status() against our cached descriptors.
@@ -501,4 +524,23 @@ class TestController(unittest.TestCase):
       
       self.assertEqual(first_descriptor, controller.get_network_status(first_descriptor.fingerprint))
       self.assertEqual(first_descriptor, controller.get_network_status(first_descriptor.nickname))
+  
+  def test_get_network_statuses(self):
+    """
+    Fetches a few descriptors via the get_network_statuses() method.
+    """
+    
+    runner = test.runner.get_runner()
+    
+    if test.runner.require_control(self): return
+    
+    with runner.get_tor_controller() as controller:
+      count = 0
+      
+      for desc in controller.get_network_statuses():
+        self.assertTrue(desc.fingerprint != None)
+        self.assertTrue(desc.nickname != None)
+        
+        count += 1
+        if count > 10: break
 





More information about the tor-commits mailing list