[tor-commits] [stem/master] Support a DocumentHandler when reading consensus documents

atagar at torproject.org atagar at torproject.org
Sat Aug 17 20:44:27 UTC 2019


commit 20c36088b536022995d9f10475a6386dbd5d4fdb
Author: Damian Johnson <atagar at torproject.org>
Date:   Wed Aug 14 14:04:23 2019 -0700

    Support a DocumentHandler when reading consensus documents
    
    When reading a consensus our callers need the ability to specify what form they
    wish to receive the document (just the document, a document with the relays
    populated, or just the relays).
    
    Unfortunately CollecTor has separate annotations for v3 consensus and vote
    documents, whereas all other classes have at most one. This requires rethinking
    the annotation constants but for the moment moving on to bigger fish.
---
 stem/descriptor/collector.py      | 21 +++++++++++++--------
 stem/descriptor/networkstatus.py  |  2 ++
 test/unit/descriptor/collector.py |  9 ++++++++-
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index 09e94e74..7c2447a2 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -78,10 +78,11 @@ import shutil
 import tempfile
 import time
 
+import stem.descriptor
 import stem.util.connection
 import stem.util.str_tools
 
-from stem.descriptor import Compression, parse_file
+from stem.descriptor import Compression, DocumentHandler
 
 COLLECTOR_URL = 'https://collector.torproject.org/'
 REFRESH_INDEX_RATE = 3600  # get new index if cached copy is an hour old
@@ -178,14 +179,14 @@ def get_microdescriptors(start = None, end = None, cache_to = None, timeout = No
     yield desc
 
 
-def get_consensus(start = None, end = None, cache_to = None, version = 3, microdescriptor = False, timeout = None, retries = 3):
+def get_consensus(start = None, end = None, cache_to = None, document_handler = DocumentHandler.ENTRIES, version = 3, microdescriptor = False, timeout = None, retries = 3):
   """
   Shorthand for
   :func:`~stem.descriptor.collector.CollecTor.get_consensus`
   on our singleton instance.
   """
 
-  for desc in get_instance().get_consensus(start, end, cache_to, version, microdescriptor, timeout, retries):
+  for desc in get_instance().get_consensus(start, end, cache_to, document_handler, version, microdescriptor, timeout, retries):
     yield desc
 
 
@@ -216,7 +217,7 @@ class File(object):
     self._guessed_type = File._guess_descriptor_types(path)
     self._downloaded_to = None  # location we last downloaded to
 
-  def read(self, directory = None, descriptor_type = None, timeout = None, retries = 3):
+  def read(self, directory = None, descriptor_type = None, document_handler = DocumentHandler.ENTRIES, timeout = None, retries = 3):
     """
     Provides descriptors from this archive. Descriptors are downloaded or read
     from disk as follows...
@@ -239,6 +240,8 @@ class File(object):
     :param str descriptor_type: `descriptor type
       <https://metrics.torproject.org/collector.html#data-formats>`_, this is
       guessed if not provided
+    :var stem.descriptor.__init__.DocumentHandler document_handler: method in
+      which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
     :param int timeout: timeout when connection becomes idle, no timeout
       applied if **None**
     :param int retires: maximum attempts to impose
@@ -269,7 +272,7 @@ class File(object):
 
         tmp_directory = tempfile.mkdtemp()
 
-        for desc in self.read(tmp_directory, descriptor_type, timeout, retries):
+        for desc in self.read(tmp_directory, descriptor_type, document_handler, timeout, retries):
           yield desc
 
         shutil.rmtree(tmp_directory)
@@ -281,7 +284,7 @@ class File(object):
     # Archives can contain multiple descriptor types, so parsing everything and
     # filtering to what we're after.
 
-    for desc in parse_file(path):
+    for desc in stem.descriptor.parse_file(path, document_handler = document_handler):
       desc_annotation = type(desc).TYPE_ANNOTATION_NAME
 
       if descriptor_type is None or (desc_annotation and descriptor_type.startswith(desc_annotation)):
@@ -497,7 +500,7 @@ class CollecTor(object):
       for desc in f.read(cache_to, 'microdescriptor', timeout = timeout, retries = retries):
         yield desc
 
-  def get_consensus(self, start = None, end = None, cache_to = None, version = 3, microdescriptor = False, timeout = None, retries = 3):
+  def get_consensus(self, start = None, end = None, cache_to = None, document_handler = DocumentHandler.ENTRIES, version = 3, microdescriptor = False, timeout = None, retries = 3):
     """
     Provides consensus router status entries published during the given time
     range, sorted oldest to newest.
@@ -506,6 +509,8 @@ class CollecTor(object):
     :param datetime.datetime end: time range to end with
     :param str cache_to: directory to cache archives into, if an archive is
       available here it is not downloaded
+    :var stem.descriptor.__init__.DocumentHandler document_handler: method in
+      which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
     :param int version: consensus variant to retrieve (versions 2 or 3)
     :param bool microdescriptor: provides the microdescriptor consensus if
       **True**, standard consensus otherwise
@@ -536,7 +541,7 @@ class CollecTor(object):
     # TODO: document vs router status entries (ie. DocumentType)?
 
     for f in self.files(desc_type, start, end):
-      for desc in f.read(cache_to, desc_type, timeout = timeout, retries = retries):
+      for desc in f.read(cache_to, desc_type, document_handler, timeout = timeout, retries = retries):
         yield desc
 
   def index(self, compression = 'best'):
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index b0589f2a..dd7759f5 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -1032,6 +1032,8 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
      Added the bandwidth_file_digest attributbute.
   """
 
+  TYPE_ANNOTATION_NAME = 'network-status-consensus-3'  # TODO: can also be network-status-vote-3
+
   ATTRIBUTES = {
     'version': (None, _parse_header_network_status_version_line),
     'version_flavor': ('ns', _parse_header_network_status_version_line),
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py
index f5bed87e..7d1f0205 100644
--- a/test/unit/descriptor/collector.py
+++ b/test/unit/descriptor/collector.py
@@ -8,7 +8,7 @@ import unittest
 
 import stem.prereq
 
-from stem.descriptor import Compression
+from stem.descriptor import Compression, DocumentHandler
 from stem.descriptor.collector import CollecTor, File
 from test.unit.descriptor import get_resource
 from test.unit.descriptor.data.collector.index import EXAMPLE_INDEX
@@ -255,6 +255,13 @@ class TestCollector(unittest.TestCase):
     self.assertEqual('RouterStatusEntryV3', type(f).__name__)
     self.assertEqual('000A10D43011EA4928A35F610405F92B4433B4DC', f.fingerprint)
 
+    descriptors = list(stem.descriptor.collector.get_consensus(document_handler = DocumentHandler.DOCUMENT))
+    self.assertEqual(2, len(descriptors))
+
+    f = descriptors[0]
+    self.assertEqual('NetworkStatusDocumentV3', type(f).__name__)
+    self.assertEqual(35, len(f.routers))
+
     # this archive shouldn't have any v2 or microdescriptor consensus data
 
     self.assertEqual(0, len(list(stem.descriptor.collector.get_consensus(version = 2))))





More information about the tor-commits mailing list