commit 20c36088b536022995d9f10475a6386dbd5d4fdb Author: Damian Johnson atagar@torproject.org Date: Wed Aug 14 14:04:23 2019 -0700
Support a DocumentHandler when reading consensus documents
When reading a consensus our callers need the ability to specify what form they wish to receive the document (just the document, a document with the relays populated, or just the relays).
Unfortunately CollecTor has separate annotations for v3 consensus and vote documents, whereas all other classes have at most one. This requires rethinking the annotation constants but for the moment moving on to bigger fish. --- stem/descriptor/collector.py | 21 +++++++++++++-------- stem/descriptor/networkstatus.py | 2 ++ test/unit/descriptor/collector.py | 9 ++++++++- 3 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py index 09e94e74..7c2447a2 100644 --- a/stem/descriptor/collector.py +++ b/stem/descriptor/collector.py @@ -78,10 +78,11 @@ import shutil import tempfile import time
+import stem.descriptor import stem.util.connection import stem.util.str_tools
-from stem.descriptor import Compression, parse_file +from stem.descriptor import Compression, DocumentHandler
COLLECTOR_URL = 'https://collector.torproject.org/' REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old @@ -178,14 +179,14 @@ def get_microdescriptors(start = None, end = None, cache_to = None, timeout = No yield desc
-def get_consensus(start = None, end = None, cache_to = None, version = 3, microdescriptor = False, timeout = None, retries = 3): +def get_consensus(start = None, end = None, cache_to = None, document_handler = DocumentHandler.ENTRIES, version = 3, microdescriptor = False, timeout = None, retries = 3): """ Shorthand for :func:`~stem.descriptor.collector.CollecTor.get_consensus` on our singleton instance. """
- for desc in get_instance().get_consensus(start, end, cache_to, version, microdescriptor, timeout, retries): + for desc in get_instance().get_consensus(start, end, cache_to, document_handler, version, microdescriptor, timeout, retries): yield desc
@@ -216,7 +217,7 @@ class File(object): self._guessed_type = File._guess_descriptor_types(path) self._downloaded_to = None # location we last downloaded to
- def read(self, directory = None, descriptor_type = None, timeout = None, retries = 3): + def read(self, directory = None, descriptor_type = None, document_handler = DocumentHandler.ENTRIES, timeout = None, retries = 3): """ Provides descriptors from this archive. Descriptors are downloaded or read from disk as follows... @@ -239,6 +240,8 @@ class File(object): :param str descriptor_type: `descriptor type https://metrics.torproject.org/collector.html#data-formats`_, this is guessed if not provided + :var stem.descriptor.__init__.DocumentHandler document_handler: method in + which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` :param int timeout: timeout when connection becomes idle, no timeout applied if **None** :param int retires: maximum attempts to impose @@ -269,7 +272,7 @@ class File(object):
tmp_directory = tempfile.mkdtemp()
- for desc in self.read(tmp_directory, descriptor_type, timeout, retries): + for desc in self.read(tmp_directory, descriptor_type, document_handler, timeout, retries): yield desc
shutil.rmtree(tmp_directory) @@ -281,7 +284,7 @@ class File(object): # Archives can contain multiple descriptor types, so parsing everything and # filtering to what we're after.
- for desc in parse_file(path): + for desc in stem.descriptor.parse_file(path, document_handler = document_handler): desc_annotation = type(desc).TYPE_ANNOTATION_NAME
if descriptor_type is None or (desc_annotation and descriptor_type.startswith(desc_annotation)): @@ -497,7 +500,7 @@ class CollecTor(object): for desc in f.read(cache_to, 'microdescriptor', timeout = timeout, retries = retries): yield desc
- def get_consensus(self, start = None, end = None, cache_to = None, version = 3, microdescriptor = False, timeout = None, retries = 3): + def get_consensus(self, start = None, end = None, cache_to = None, document_handler = DocumentHandler.ENTRIES, version = 3, microdescriptor = False, timeout = None, retries = 3): """ Provides consensus router status entries published during the given time range, sorted oldest to newest. @@ -506,6 +509,8 @@ class CollecTor(object): :param datetime.datetime end: time range to end with :param str cache_to: directory to cache archives into, if an archive is available here it is not downloaded + :var stem.descriptor.__init__.DocumentHandler document_handler: method in + which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` :param int version: consensus variant to retrieve (versions 2 or 3) :param bool microdescriptor: provides the microdescriptor consensus if **True**, standard consensus otherwise @@ -536,7 +541,7 @@ class CollecTor(object): # TODO: document vs router status entries (ie. DocumentType)?
for f in self.files(desc_type, start, end): - for desc in f.read(cache_to, desc_type, timeout = timeout, retries = retries): + for desc in f.read(cache_to, desc_type, document_handler, timeout = timeout, retries = retries): yield desc
def index(self, compression = 'best'): diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index b0589f2a..dd7759f5 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -1032,6 +1032,8 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): Added the bandwidth_file_digest attributbute. """
+ TYPE_ANNOTATION_NAME = 'network-status-consensus-3' # TODO: can also be network-status-vote-3 + ATTRIBUTES = { 'version': (None, _parse_header_network_status_version_line), 'version_flavor': ('ns', _parse_header_network_status_version_line), diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py index f5bed87e..7d1f0205 100644 --- a/test/unit/descriptor/collector.py +++ b/test/unit/descriptor/collector.py @@ -8,7 +8,7 @@ import unittest
import stem.prereq
-from stem.descriptor import Compression +from stem.descriptor import Compression, DocumentHandler from stem.descriptor.collector import CollecTor, File from test.unit.descriptor import get_resource from test.unit.descriptor.data.collector.index import EXAMPLE_INDEX @@ -255,6 +255,13 @@ class TestCollector(unittest.TestCase): self.assertEqual('RouterStatusEntryV3', type(f).__name__) self.assertEqual('000A10D43011EA4928A35F610405F92B4433B4DC', f.fingerprint)
+ descriptors = list(stem.descriptor.collector.get_consensus(document_handler = DocumentHandler.DOCUMENT)) + self.assertEqual(2, len(descriptors)) + + f = descriptors[0] + self.assertEqual('NetworkStatusDocumentV3', type(f).__name__) + self.assertEqual(35, len(f.routers)) + # this archive shouldn't have any v2 or microdescriptor consensus data
self.assertEqual(0, len(list(stem.descriptor.collector.get_consensus(version = 2))))
tor-commits@lists.torproject.org