commit 4196e37f0f43d033b23f740c1e3e3d9612ba4655 Author: Damian Johnson atagar@torproject.org Date: Mon Jul 29 19:49:37 2019 -0700
Initial download function
Few rough edges, but a copy-paste of the initial pydoc demo now works. --- stem/descriptor/collector.py | 69 +++++++++++++++++++++++++++++++++++--- stem/descriptor/remote.py | 6 +--- test/integ/descriptor/collector.py | 4 +-- 3 files changed, 67 insertions(+), 12 deletions(-)
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py index f76fa225..a78d60c4 100644 --- a/stem/descriptor/collector.py +++ b/stem/descriptor/collector.py @@ -16,7 +16,7 @@ With this you can either download and read directly from CollecTor... import datetime import stem.descriptor.collector
- yesterday = datetime.date.today() - datetime.timedelta(1) + yesterday = datetime.datetime.today() - datetime.timedelta(1)
# provide yesterday's exits
@@ -33,7 +33,7 @@ With this you can either download and read directly from CollecTor... import stem.descriptor import stem.descriptor.collector
- yesterday = datetime.date.today() - datetime.timedelta(1) + yesterday = datetime.datetime.today() - datetime.timedelta(1) path = os.path.expanduser('~/descriptor_cache/server_desc_today')
with open(path, 'wb') as cache_file: @@ -53,6 +53,7 @@ import datetime import json import os import re +import shutil import sys import tempfile import time @@ -70,6 +71,7 @@ except ImportError:
COLLECTOR_URL = 'https://collector.torproject.org/' REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old +SINGLETON_COLLECTOR = None
YEAR_DATE = re.compile('-(\d{4})-(\d{2})\.') SEC_DATE = re.compile('(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})') @@ -113,6 +115,52 @@ COLLECTOR_DESC_TYPES = { }
+def get_instance(): + """ + Provides the singleton :class:`~stem.descriptor.collector.CollecTor` + used for this module's shorthand functions. + + :returns: singleton :class:`~stem.descriptor.collector.CollecTor` instance + """ + + global SINGLETON_COLLECTOR + + if SINGLETON_COLLECTOR is None: + SINGLETON_COLLECTOR = CollecTor() + + return SINGLETON_COLLECTOR + + +def get_server_descriptors(start = None, end = None, cache_to = None, timeout = None, retries = 3): + """ + Provides server descriptors for the given time range, sorted oldest to + newest. + + :param datetime.datetime start: time range to begin with + :param datetime.datetime end: time range to end with + :param str cache_to: directory to cache archives into, if an archive is + available here it is not downloaded + :param int timeout: timeout for downloading each individual archive when the + connection becomes idle, no timeout applied if **None** + :param int retires: maximum attempts to impose on a per-archive basis + + :returns: **iterator** of + :class:`~stem.descriptor.server_descriptor.ServerDescriptor` for the given + time range + + :raises: + * **socket.timeout** if our request timed out + * **urllib2.URLError** for most request failures + + Note that the urllib2 module may fail with other exception types, in + which case we'll pass it along. + """ + + for f in get_instance().files('server-descriptor', start, end): + for desc in f.read(cache_to, timeout = timeout, retries = retries): + yield desc + + def _download(url, timeout, retries): """ Download from the given url. @@ -229,13 +277,24 @@ class File(object): if self._downloaded_to and os.path.exists(self._downloaded_to): directory = os.path.dirname(self._downloaded_to) else: - with tempfile.TemporaryDirectory() as tmp_directory: - return self.read(tmp_directory, descriptor_type, timeout, retries) + # TODO: The following can be replaced with simpler usage of + # tempfile.TemporaryDirectory when we drop python 2.x support. + + tmp_directory = tempfile.mkdtemp() + + for desc in self.read(tmp_directory, descriptor_type, timeout, retries): + yield desc + + shutil.rmtree(tmp_directory) + + return
# TODO: the following will not work if the tar contains multiple types or a type we do not support
path = self.download(directory, True, timeout, retries) - return parse_file(path, descriptor_type) + + for desc in parse_file(path, descriptor_type): + yield desc
def download(self, directory, decompress = True, timeout = None, retries = 3): """ diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 251fc26f..af24f624 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -164,11 +164,7 @@ DIR_PORT_BLACKLIST = ('tor26', 'Serge') def get_instance(): """ Provides the singleton :class:`~stem.descriptor.remote.DescriptorDownloader` - used for the following functions... - - * :func:`stem.descriptor.remote.get_server_descriptors` - * :func:`stem.descriptor.remote.get_extrainfo_descriptors` - * :func:`stem.descriptor.remote.get_consensus` + used for this module's shorthand functions.
.. versionadded:: 1.5.0
diff --git a/test/integ/descriptor/collector.py b/test/integ/descriptor/collector.py index dbb09d5a..6a0ec5ac 100644 --- a/test/integ/descriptor/collector.py +++ b/test/integ/descriptor/collector.py @@ -35,8 +35,8 @@ class TestCollector(unittest.TestCase): if compression and not compression.available: self.skipTest('(%s unavailable)' % compression)
- collector = CollecTor(compression = compression) - index = collector.index() + collector = CollecTor() + index = collector.index(compression = compression)
self.assertEqual('https://collector.torproject.org', index['path']) self.assertEqual(['archive', 'contrib', 'recent'], [entry['path'] for entry in index['directories']])
tor-commits@lists.torproject.org