tor-commits
Threads by month
- ----- 2025 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
July 2013
- 19 participants
- 1045 discussions
commit 5514b2cfd7d313f429bfe1bb8a13c975f7c3413f
Author: Damian Johnson <atagar(a)torproject.org>
Date: Thu Jul 18 09:59:30 2013 -0700
Implementing a get_consensus() method
Originally this was gonna be a get_network_status(fingerprint) method but
evedently we can't request individual router status entries. Understandable
since signatures are for the whole document but still a bit of a pity from an
API perspective. Oh well.
---
stem/descriptor/__init__.py | 2 +-
stem/descriptor/remote.py | 60 +++++++++++++++++++++++++++++----------
test/integ/descriptor/remote.py | 32 ++++++++++++++++++++-
test/runner.py | 1 +
4 files changed, 78 insertions(+), 17 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index a527329..82f846d 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -132,7 +132,7 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
:param bool validate: checks the validity of the descriptor's content if
**True**, skips these checks otherwise
:param stem.descriptor.__init__.DocumentHandler document_handler: method in
- which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
+ which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
:returns: iterator for :class:`~stem.descriptor.__init__.Descriptor` instances in the file
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index a18d991..66e6d85 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -82,6 +82,8 @@ class Query(object):
:var str resource: resource being fetched, such as '/tor/status-vote/current/consensus.z'
:var str descriptor_type: type of descriptors being fetched, see
:func:`~stem.descriptor.__init__.parse_file`
+ :param stem.descriptor.__init__.DocumentHandler document_handler: method in
+ which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
:var list endpoints: (address, dirport) tuples of the authority or mirror
we're querying, this uses authorities if undefined
@@ -101,9 +103,10 @@ class Query(object):
finished
"""
- def __init__(self, resource, descriptor_type, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True):
+ def __init__(self, resource, descriptor_type, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
self.resource = resource
self.descriptor_type = descriptor_type
+ self.document_handler = document_handler
self.endpoints = endpoints if endpoints else []
self.retries = retries
@@ -220,7 +223,7 @@ class Query(object):
response = io.BytesIO(response.read().strip())
- self._results = stem.descriptor.parse_file(response, self.descriptor_type)
+ self._results = stem.descriptor.parse_file(response, self.descriptor_type, document_handler = self.document_handler)
log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
except:
exc = sys.exc_info()[1]
@@ -278,14 +281,14 @@ class DescriptorDownloader(object):
resource = '/tor/server/all'
+ if isinstance(fingerprints, str):
+ fingerprints = [fingerprints]
+
if fingerprints:
- if isinstance(fingerprints, str):
- resource = '/tor/server/fp/%s' % fingerprints
- else:
- if len(fingerprints) > MAX_BATCH_SIZE:
- raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_BATCH_SIZE)
+ if len(fingerprints) > MAX_BATCH_SIZE:
+ raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_BATCH_SIZE)
- resource = '/tor/server/fp/%s' % '+'.join(fingerprints)
+ resource = '/tor/server/fp/%s' % '+'.join(fingerprints)
return self._query(resource, 'server-descriptor 1.0')
@@ -306,18 +309,44 @@ class DescriptorDownloader(object):
resource = '/tor/extra/all'
+ if isinstance(fingerprints, str):
+ fingerprints = [fingerprints]
+
if fingerprints:
- if isinstance(fingerprints, str):
- resource = '/tor/extra/fp/%s' % fingerprints
- else:
- if len(fingerprints) > MAX_BATCH_SIZE:
- raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_BATCH_SIZE)
+ if len(fingerprints) > MAX_BATCH_SIZE:
+ raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_BATCH_SIZE)
- resource = '/tor/extra/fp/%s' % '+'.join(fingerprints)
+ resource = '/tor/extra/fp/%s' % '+'.join(fingerprints)
return self._query(resource, 'extra-info 1.0')
- def _query(self, resource, descriptor_type):
+ def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None):
+ """
+ Provides the present router status entries.
+
+ :param stem.descriptor.__init__.DocumentHandler document_handler: method in
+ which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`
+ :param str authority_v3ident: fingerprint of the authority key for which
+ to get the consensus, see `'v3ident' in tor's config.c
+ <https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
+ for the values.
+
+ :returns: :class:`~stem.descriptor.remote.Query` for the router status
+ entries
+ """
+
+ resource = '/tor/status-vote/current/consensus'
+
+ if authority_v3ident:
+ resource += '/%s' % authority_v3ident
+
+ return self._query(
+ resource,
+ 'network-status-consensus-3 1.0',
+ document_handler = document_handler,
+ )
+
+ def _query(self, resource, descriptor_type, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
"""
Issues a request for the given resource.
"""
@@ -330,4 +359,5 @@ class DescriptorDownloader(object):
fall_back_to_authority = self.fall_back_to_authority,
timeout = self.timeout,
start = self.start_when_requested,
+ document_handler = document_handler,
)
diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py
index 62348ab..9273b61 100644
--- a/test/integ/descriptor/remote.py
+++ b/test/integ/descriptor/remote.py
@@ -4,9 +4,10 @@ Integration tests for stem.descriptor.remote.
import unittest
-import stem.descriptor.server_descriptor
import stem.descriptor.extrainfo_descriptor
import stem.descriptor.remote
+import stem.descriptor.router_status_entry
+import stem.descriptor.server_descriptor
import test.runner
# Required to prevent unmarshal error when running this test alone.
@@ -26,6 +27,8 @@ class TestDescriptorReader(unittest.TestCase):
if test.runner.require_online(self):
return
+ elif test.runner.only_run_once(self, "test_using_authorities"):
+ return
queries = []
@@ -51,6 +54,11 @@ class TestDescriptorReader(unittest.TestCase):
Exercises the downloader's get_server_descriptors() method.
"""
+ if test.runner.require_online(self):
+ return
+ elif test.runner.only_run_once(self, "test_get_server_descriptors"):
+ return
+
downloader = stem.descriptor.remote.DescriptorDownloader()
# Fetch a single descriptor and a batch. I'd love to also exercise
@@ -82,6 +90,11 @@ class TestDescriptorReader(unittest.TestCase):
Exercises the downloader's get_extrainfo_descriptors() method.
"""
+ if test.runner.require_online(self):
+ return
+ elif test.runner.only_run_once(self, "test_get_extrainfo_descriptors"):
+ return
+
downloader = stem.descriptor.remote.DescriptorDownloader()
single_query = downloader.get_extrainfo_descriptors('9695DFC35FFEB861329B9F1AB04C46397020CE31')
@@ -101,4 +114,21 @@ class TestDescriptorReader(unittest.TestCase):
self.assertEqual(2, len(list(multiple_query)))
+ def test_get_consensus(self):
+ """
+ Exercises the downloader's get_consensus() method.
+ """
+
+ if test.runner.require_online(self):
+ return
+ elif test.runner.only_run_once(self, "test_get_consensus"):
+ return
+
+ downloader = stem.descriptor.remote.DescriptorDownloader()
+
+ consensus_query = downloader.get_consensus()
+ consensus_query.run()
+ consensus = list(consensus_query)
+ self.assertTrue(len(consensus) > 50)
+ self.assertTrue(isinstance(consensus[0], stem.descriptor.router_status_entry.RouterStatusEntryV3))
diff --git a/test/runner.py b/test/runner.py
index 9bd62f7..a1eda9e 100644
--- a/test/runner.py
+++ b/test/runner.py
@@ -16,6 +16,7 @@ about the tor test instance they're running against.
require_control - skips the test unless tor provides a controller endpoint
require_version - skips the test unless we meet a tor version requirement
require_online - skips unless targets allow for online tests
+ only_run_once - skip the test if it has been ran before
exercise_controller - basic sanity check that a controller connection can be used
get_runner - Singleton for fetching our runtime context.
1
0
[stem/master] Adding a query_args parameter to all downloader methods
by atagar@torproject.org 22 Jul '13
by atagar@torproject.org 22 Jul '13
22 Jul '13
commit e95e00dd6431001000a79e2bfc009a7f3abbde80
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sun Jul 21 16:31:00 2013 -0700
Adding a query_args parameter to all downloader methods
Simple tweak in case callers want to do something dynamic (such as varying the
timeout with each request).
---
stem/descriptor/remote.py | 34 +++++++++++++++++++++-------------
1 file changed, 21 insertions(+), 13 deletions(-)
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 01130f3..a009078 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -406,7 +406,7 @@ class DescriptorDownloader(object):
self._endpoints = list(new_endpoints)
- def get_server_descriptors(self, fingerprints = None):
+ def get_server_descriptors(self, fingerprints = None, **query_args):
"""
Provides the server descriptors with the given fingerprints. If no
fingerprints are provided then this returns all descriptors in the present
@@ -414,6 +414,8 @@ class DescriptorDownloader(object):
:param str,list fingerprints: fingerprint or list of fingerprints to be
retrieved, gets all descriptors if **None**
+ :param query_args: additional arguments for the
+ :class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the server descriptors
@@ -432,9 +434,9 @@ class DescriptorDownloader(object):
resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints)
- return self.query(resource)
+ return self.query(resource, **query_args)
- def get_extrainfo_descriptors(self, fingerprints = None):
+ def get_extrainfo_descriptors(self, fingerprints = None, **query_args):
"""
Provides the extrainfo descriptors with the given fingerprints. If no
fingerprints are provided then this returns all descriptors in the present
@@ -442,6 +444,8 @@ class DescriptorDownloader(object):
:param str,list fingerprints: fingerprint or list of fingerprints to be
retrieved, gets all descriptors if **None**
+ :param query_args: additional arguments for the
+ :class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the extrainfo descriptors
@@ -460,9 +464,9 @@ class DescriptorDownloader(object):
resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints)
- return self.query(resource)
+ return self.query(resource, **query_args)
- def get_microdescriptors(self, hashes):
+ def get_microdescriptors(self, hashes, **query_args):
"""
Provides the microdescriptors with the given hashes. To get these see the
'microdescriptor_hashes' attribute of
@@ -472,6 +476,8 @@ class DescriptorDownloader(object):
:param str,list hashes: microdescriptor hash or list of hashes to be
retrieved
+ :param query_args: additional arguments for the
+ :class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the microdescriptors
@@ -485,9 +491,9 @@ class DescriptorDownloader(object):
if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE:
raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE)
- return self.query('/tor/micro/d/%s.z' % '-'.join(hashes))
+ return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args)
- def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None):
+ def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None, **query_args):
"""
Provides the present router status entries.
@@ -497,6 +503,8 @@ class DescriptorDownloader(object):
to get the consensus, see `'v3ident' in tor's config.c
<https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
for the values.
+ :param query_args: additional arguments for the
+ :class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the router status
entries
@@ -507,14 +515,14 @@ class DescriptorDownloader(object):
if authority_v3ident:
resource += '/%s' % authority_v3ident
- return self.query(resource + '.z', document_handler = document_handler)
+ return self.query(resource + '.z', document_handler = document_handler, **query_args)
- def query(self, resource, **kwargs):
+ def query(self, resource, **query_args):
"""
Issues a request for the given resource.
:param str resource: resource being fetched, such as '/tor/server/all.z'
- :param kwargs: additional arguments for the
+ :param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the descriptors
@@ -523,11 +531,11 @@ class DescriptorDownloader(object):
type can't be determined when 'descriptor_type' is **None**
"""
- query_args = dict(self._default_args)
- query_args.update(kwargs)
+ args = dict(self._default_args)
+ args.update(query_args)
return Query(
resource,
endpoints = self._endpoints,
- **query_args
+ **args
)
1
0
commit 6f12acc9e54842371f52203550a6ef2f9cbfb835
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sun Jul 21 16:25:58 2013 -0700
Simplifying DescriptorDownloader class
The DescriptorDownloader is a very, very simple class that simply issues
Queries on your behalf. Why should it pretend to be anything else?
Dropping most of its attributes (and with it a lot of copy-and-pasted pydocs).
Also making the query() method public.
---
stem/descriptor/remote.py | 62 ++++++++++++++++++---------------------------
1 file changed, 25 insertions(+), 37 deletions(-)
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 777f769..01130f3 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -51,7 +51,8 @@ itself...
|- get_server_descriptors - provides present :class:`~stem.descriptor.stem.descriptor.server_descriptor.ServerDescriptor`
|- get_extrainfo_descriptors - provides present :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor`
|- get_microdescriptors - provides present :class:`~stem.descriptor.microdescriptor.Microdescriptor`
- +- get_consensus - provides present :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`
+ |- get_consensus - provides present :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`
+ +- query - request an arbitrary descriptor resource
.. data:: MAX_DESCRIPTOR_BATCH_SIZE
@@ -365,35 +366,18 @@ class Query(object):
class DescriptorDownloader(object):
"""
- Configurable class through which descriptors can be downloaded. This provides
- caching, retries, and other capabilities to make downloading descriptors easy
- and efficient.
-
- For more advanced use cases you can use the
- :class:`~stem.descriptor.remote.Query` class directly.
+ Configurable class that issues :class:`~stem.descriptor.remote.Query`
+ instances on your behalf.
:param bool use_mirrors: downloads the present consensus and uses the directory
mirrors to fetch future requests, this fails silently if the consensus
cannot be downloaded
-
- :var int retries: number of times to attempt the request if it fails
- :var bool fall_back_to_authority: when retrying request issues the last
- request to a directory authority if **True**
- :var float timeout: duration before we'll time out our request, no timeout is
- applied if **None**
- :var bool start_when_requested: issues requests when our methods are called
- if **True**, otherwise provides non-running
- :class:`~stem.descriptor.remote.Query` instances
- :var bool validate: checks the validity of the descriptor's content if
- **True**, skips these checks otherwise
+ :param default_args: default arguments for the
+ :class:`~stem.descriptor.remote.Query` constructor
"""
- def __init__(self, use_mirrors = False, retries = 2, fall_back_to_authority = True, timeout = None, start_when_requested = True, validate = True):
- self.retries = retries
- self.fall_back_to_authority = fall_back_to_authority
- self.timeout = timeout
- self.start_when_requested = start_when_requested
- self.validate = validate
+ def __init__(self, use_mirrors = False, **default_args):
+ self._default_args = default_args
self._endpoints = DIRECTORY_AUTHORITIES.values()
if use_mirrors:
@@ -448,7 +432,7 @@ class DescriptorDownloader(object):
resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints)
- return self._query(resource)
+ return self.query(resource)
def get_extrainfo_descriptors(self, fingerprints = None):
"""
@@ -476,7 +460,7 @@ class DescriptorDownloader(object):
resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints)
- return self._query(resource)
+ return self.query(resource)
def get_microdescriptors(self, hashes):
"""
@@ -501,7 +485,7 @@ class DescriptorDownloader(object):
if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE:
raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE)
- return self._query('/tor/micro/d/%s.z' % '-'.join(hashes))
+ return self.query('/tor/micro/d/%s.z' % '-'.join(hashes))
def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None):
"""
@@ -523,23 +507,27 @@ class DescriptorDownloader(object):
if authority_v3ident:
resource += '/%s' % authority_v3ident
- return self._query(resource + '.z', document_handler = document_handler)
+ return self.query(resource + '.z', document_handler = document_handler)
- def _query(self, resource, descriptor_type = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
+ def query(self, resource, **kwargs):
"""
Issues a request for the given resource.
+
+ :param str resource: resource being fetched, such as '/tor/server/all.z'
+ :param kwargs: additional arguments for the
+ :class:`~stem.descriptor.remote.Query` constructor
+
+ :returns: :class:`~stem.descriptor.remote.Query` for the descriptors
+
+ :raises: **ValueError** if resource is clearly invalid or the descriptor
+ type can't be determined when 'descriptor_type' is **None**
"""
- log.trace("Retrieving descriptors (resource: %s, type: %s)" % (resource, descriptor_type))
+ query_args = dict(self._default_args)
+ query_args.update(kwargs)
return Query(
resource,
- descriptor_type,
endpoints = self._endpoints,
- retries = self.retries,
- fall_back_to_authority = self.fall_back_to_authority,
- timeout = self.timeout,
- start = self.start_when_requested,
- validate = self.validate,
- document_handler = document_handler,
+ **query_args
)
1
0
commit 25951d4f12a16a1fa6e42ea6b7479959d636139b
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sat Jul 20 10:00:04 2013 -0700
Making descriptor validation optional
Adding a 'validate' argument that can be passed thorough when parsing
descriptors.
---
stem/descriptor/remote.py | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index e0ca612..83416c8 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -84,8 +84,6 @@ class Query(object):
:var str resource: resource being fetched, such as '/tor/status-vote/current/consensus.z'
:var str descriptor_type: type of descriptors being fetched, see
:func:`~stem.descriptor.__init__.parse_file`
- :param stem.descriptor.__init__.DocumentHandler document_handler: method in
- which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
:var list endpoints: (address, dirport) tuples of the authority or mirror
we're querying, this uses authorities if undefined
@@ -103,12 +101,16 @@ class Query(object):
:var float timeout: duration before we'll time out our request
:var float runtime: time our query took, this is **None** if it's not yet
finished
+
+ :var bool validate: checks the validity of the descriptor's content if
+ **True**, skips these checks otherwise
+ :var stem.descriptor.__init__.DocumentHandler document_handler: method in
+ which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
"""
- def __init__(self, resource, descriptor_type, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
+ def __init__(self, resource, descriptor_type, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
self.resource = resource
self.descriptor_type = descriptor_type
- self.document_handler = document_handler
self.endpoints = endpoints if endpoints else []
self.retries = retries
@@ -122,6 +124,9 @@ class Query(object):
self.timeout = timeout
self.runtime = None
+ self.validate = validate
+ self.document_handler = document_handler
+
self._downloader_thread = None
self._downloader_thread_lock = threading.RLock()
@@ -225,7 +230,7 @@ class Query(object):
response = io.BytesIO(response.read().strip())
- self._results = stem.descriptor.parse_file(response, self.descriptor_type, document_handler = self.document_handler)
+ self._results = stem.descriptor.parse_file(response, self.descriptor_type, validate = self.validate, document_handler = self.document_handler)
log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
except:
exc = sys.exc_info()[1]
@@ -258,15 +263,18 @@ class DescriptorDownloader(object):
:var bool start_when_requested: issues requests when our methods are called
if **True**, otherwise provides non-running
:class:`~stem.descriptor.remote.Query` instances
+ :var bool validate: checks the validity of the descriptor's content if
+ **True**, skips these checks otherwise
:var bool fall_back_to_authority: when retrying request issues the last
request to a directory authority if **True**
"""
- def __init__(self, retries = 2, use_mirrors = False, fall_back_to_authority = True, timeout = None, start_when_requested = True):
+ def __init__(self, retries = 2, use_mirrors = False, fall_back_to_authority = True, timeout = None, start_when_requested = True, validate = True):
self.retries = retries
self.timeout = timeout
self.start_when_requested = start_when_requested
self.fall_back_to_authority = fall_back_to_authority
+ self.validate = validate
self._endpoints = DIRECTORY_AUTHORITIES.values()
if use_mirrors:
@@ -422,5 +430,6 @@ class DescriptorDownloader(object):
fall_back_to_authority = self.fall_back_to_authority,
timeout = self.timeout,
start = self.start_when_requested,
+ validate = self.validate,
document_handler = document_handler,
)
1
0
commit 60ad230d88b383a259c531649fdba78d0acd6493
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sun Jul 21 09:03:39 2013 -0700
Expanding remote descriptor pydocs
Adding documentation and examples for the remote descriptor API. This includes
some minor API tweaks and a missing import statement.
---
stem/descriptor/__init__.py | 1 +
stem/descriptor/remote.py | 243 ++++++++++++++++++++++++++++-----------
test/integ/descriptor/remote.py | 5 +-
test/settings.cfg | 2 +-
test/unit/descriptor/remote.py | 2 +-
5 files changed, 181 insertions(+), 72 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 82f846d..f1fdee4 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -154,6 +154,7 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
import stem.descriptor.server_descriptor
import stem.descriptor.extrainfo_descriptor
import stem.descriptor.networkstatus
+ import stem.descriptor.microdescriptor
# The tor descriptor specifications do not provide a reliable method for
# identifying a descriptor file's type and version so we need to guess
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 83416c8..8ec6f1d 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -2,14 +2,16 @@
# See LICENSE for licensing information
"""
-Utilities for retrieving descriptors from directory authorities and mirrors.
-This is mostly done through the
+Module for remotely retrieving descriptors from directory authorities and
+mirrors. This is most easily done through the
:class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues
-:class:`~stem.descriptor.remote.Query` to get descriptor content. For
-example...
+:class:`~stem.descriptor.remote.Query` instances to get you the descriptor
+content. For example...
::
+ from stem.descriptor.remote import DescriptorDownloader
+
downloader = DescriptorDownloader(
cache = '/tmp/descriptor_cache',
use_mirrors = True,
@@ -27,7 +29,7 @@ example...
print
print "Query took %0.2f seconds" % query.runtime
except Exception as exc:
- print "Unable to query the server descriptors: %s" % query.error
+ print "Unable to retrieve the server descriptors: %s" % exc
If you don't care about errors then you can also simply iterate over the query
itself...
@@ -37,6 +39,33 @@ itself...
for desc in downloader.get_server_descriptors():
if desc.exit_policy.is_exiting_allowed():
print " %s (%s)" % (desc.nickname, desc.fingerprint)
+
+::
+
+ Query - Asynchronous request to download tor descriptors
+ |- start - issues the query if it isn't already running
+ +- run - blocks until the request is finished and provides the results
+
+ DescriptorDownloader - Configurable class for issuing queries
+ |- use_directory_mirrors - use directory mirrors to download future descriptors
+ |- get_server_descriptors - provides present :class:`~stem.descriptor.stem.descriptor.server_descriptor.ServerDescriptor`
+ |- get_extrainfo_descriptors - provides present :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor`
+ |- get_microdescriptors - provides present :class:`~stem.descriptor.microdescriptor.Microdescriptor`
+ +- get_consensus - provides present :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`
+
+.. data:: MAX_DESCRIPTOR_BATCH_SIZE
+
+ Maximum number of server or extrainfo descriptors that can requested at a
+ time by their fingerprints.
+
+.. data:: MAX_MICRODESCRIPTOR_BATCH_SIZE
+
+ Maximum number of microdescriptors that can requested at a time by their
+ hashes.
+
+.. data:: DIRECTORY_AUTHORITIES
+
+ Mapping of directory authority nicknames to their (address, dirport) tuple.
"""
import io
@@ -51,8 +80,8 @@ import stem.descriptor
from stem import Flag
from stem.util import log
-# Tor has a limit on the number of descriptors we can fetch explicitly by their
-# fingerprint or hashes due to the url lenght of squid proxies.
+# Tor has a limited number of descriptors we can fetch explicitly by their
+# fingerprint or hashes due to a limit on the url length by squid proxies.
MAX_DESCRIPTOR_BATCH_SIZE = 96
MAX_MICRODESCRIPTOR_BATCH_SIZE = 92
@@ -75,15 +104,87 @@ DIRECTORY_AUTHORITIES = {
}
+def _guess_descriptor_type(resource):
+ # Attempts to determine the descriptor type based on the resource url. This
+ # raises a ValueError if the resource isn't recognized.
+
+ if resource.startswith('/tor/server/'):
+ return 'server-descriptor 1.0'
+ elif resource.startswith('/tor/extra/'):
+ return 'extra-info 1.0'
+ elif resource.startswith('/tor/micro/'):
+ return 'microdescriptor 1.0'
+ elif resource.startswith('/tor/status-vote/'):
+ return 'network-status-consensus-3 1.0'
+ else:
+ raise ValueError("Unable to determine the descriptor type for '%s'" % resource)
+
+
class Query(object):
"""
Asynchronous request for descriptor content from a directory authority or
- mirror. The caller can block on the response by either calling
- :func:~stem.descriptor.remote.run: or iterating over our descriptor content.
+ mirror. These can either be made through the
+ :class:`~stem.descriptor.remote.DescriptorDownloader` or directly for more
+ advanced usage.
+
+ To block on the response and get results either call
+ :func:`~stem.descriptor.remote.Query.run` or iterate over the Query. The
+ :func:`~stem.descriptor.remote.run` method pass along any errors that
+ arise...
+
+ ::
+
+ from stem.descriptor.remote import Query
+
+ query = Query(
+ '/tor/server/all.z',
+ 'server-descriptor 1.0',
+ timeout = 30,
+ )
+
+ print "Current relays:"
+
+ try:
+ for desc in query.run():
+ print desc.fingerprint
+ except Exception as exc:
+ print "Unable to retrieve the server descriptors: %s" % exc
+
+ ... while iterating fails silently...
+
+ ::
- :var str resource: resource being fetched, such as '/tor/status-vote/current/consensus.z'
- :var str descriptor_type: type of descriptors being fetched, see
- :func:`~stem.descriptor.__init__.parse_file`
+ print "Current relays:"
+
+ for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'):
+ print desc.fingerprint
+
+ In either case exceptions are available via our 'error' attribute.
+
+ Tor provides quite a few different descriptor resources via its directory
+ protocol (see section 4.2 and later of the `dir-spec
+ <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_).
+ Commonly useful ones include...
+
+ ======== ===========
+ Resource Description
+ ======== ===========
+ /tor/server/all.z all present server descriptors
+ /tor/server/fp/<fp1>+<fp2>+<fp3>.z server descriptors with the given fingerprints
+ /tor/extra/all.z all present extrainfo descriptors
+ /tor/extra/fp/<fp1>+<fp2>+<fp3>.z extrainfo descriptors with the given fingerprints
+ /tor/micro/d/<hash1>-<hash2>.z microdescriptors with the given hashes
+ /tor/status-vote/current/consensus.z present consensus
+ ======== ===========
+
+ The '.z' suffix can be excluded to get a plaintext rather than compressed
+ response. Compression is handled transparently, so this shouldn't matter to
+ the caller.
+
+ :var str resource: resource being fetched, such as '/tor/server/all.z'
+ :var str descriptor_type: type of descriptors being fetched (for options see
+ :func:`~stem.descriptor.__init__.parse_file`), this is guessed from the
+ resource if **None**
:var list endpoints: (address, dirport) tuples of the authority or mirror
we're querying, this uses authorities if undefined
@@ -108,9 +209,16 @@ class Query(object):
which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
"""
- def __init__(self, resource, descriptor_type, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
+ def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
+ if not resource.startswith('/'):
+ raise ValueError("Resources should start with a '/': %s" % resource)
+
self.resource = resource
- self.descriptor_type = descriptor_type
+
+ if descriptor_type:
+ self.descriptor_type = descriptor_type
+ else:
+ self.descriptor_type = _guess_descriptor_type(resource)
self.endpoints = endpoints if endpoints else []
self.retries = retries
@@ -135,24 +243,6 @@ class Query(object):
if start:
self.start()
- def pick_url(self, use_authority = False):
- """
- Provides a url that can be queried. If we have multiple endpoints then one
- will be picked randomly.
-
- :param bool use_authority: ignores our endpoints and uses a directory
- authority instead
-
- :returns: **str** for the url being queried by this request
- """
-
- if use_authority or not self.endpoints:
- address, dirport = random.choice(DIRECTORY_AUTHORITIES.values())
- else:
- address, dirport = random.choice(self.endpoints)
-
- return "http://%s:%i/%s" % (address, dirport, self.resource.lstrip('/'))
-
def start(self):
"""
Starts downloading the scriptors if we haven't started already.
@@ -160,7 +250,12 @@ class Query(object):
with self._downloader_thread_lock:
if self._downloader_thread is None:
- self._downloader_thread = threading.Thread(target = self._download_descriptors, name="Descriptor Query", args = (self.retries,))
+ self._downloader_thread = threading.Thread(
+ name = "Descriptor Query",
+ target = self._download_descriptors,
+ args = (self.retries,)
+ )
+
self._downloader_thread.setDaemon(True)
self._downloader_thread.start()
@@ -190,38 +285,57 @@ class Query(object):
self._downloader_thread.join()
if self.error:
- if not suppress:
- raise self.error
+ if suppress:
+ return
+
+ raise self.error
else:
if self._results is None:
- if not suppress:
- raise ValueError('BUG: _download_descriptors() finished without either results or an error')
+ if suppress:
+ return
- return
+ raise ValueError('BUG: _download_descriptors() finished without either results or an error')
try:
for desc in self._results:
yield desc
except ValueError as exc:
- # encountered a parsing error
+ self.error = exc # encountered a parsing error
- self.error = exc
+ if suppress:
+ return
- if not suppress:
- raise self.error
+ raise self.error
def __iter__(self):
for desc in self.run(True):
yield desc
+ def _pick_url(self, use_authority = False):
+ """
+ Provides a url that can be queried. If we have multiple endpoints then one
+ will be picked randomly.
+
+ :param bool use_authority: ignores our endpoints and uses a directory
+ authority instead
+
+ :returns: **str** for the url being queried by this request
+ """
+
+ if use_authority or not self.endpoints:
+ address, dirport = random.choice(DIRECTORY_AUTHORITIES.values())
+ else:
+ address, dirport = random.choice(self.endpoints)
+
+ return "http://%s:%i/%s" % (address, dirport, self.resource.lstrip('/'))
+
def _download_descriptors(self, retries):
try:
use_authority = retries == 0 and self.fall_back_to_authority
- self.download_url = self.pick_url(use_authority)
+ self.download_url = self._pick_url(use_authority)
self.start_time = time.time()
response = urllib2.urlopen(self.download_url, timeout = self.timeout)
- self.runtime = time.time() - self.start_time
# This sucks. We need to read the full response into memory before
# processing the content. This is because urllib2 returns a 'file like'
@@ -229,8 +343,9 @@ class Query(object):
# own buffer that does support these.
response = io.BytesIO(response.read().strip())
-
self._results = stem.descriptor.parse_file(response, self.descriptor_type, validate = self.validate, document_handler = self.document_handler)
+
+ self.runtime = time.time() - self.start_time
log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
except:
exc = sys.exc_info()[1]
@@ -254,26 +369,27 @@ class DescriptorDownloader(object):
For more advanced use cases you can use the
:class:`~stem.descriptor.remote.Query` class directly.
+ :param bool use_mirrors: downloads the present consensus and uses the directory
+ mirrors to fetch future requests, this fails silently if the consensus
+ cannot be downloaded
+
:var int retries: number of times to attempt the request if it fails
+ :var bool fall_back_to_authority: when retrying request issues the last
+ request to a directory authority if **True**
:var float timeout: duration before we'll time out our request, no timeout is
applied if **None**
- :var bool use_mirrors: downloads the present consensus and uses the directory
- mirrors to fetch future requests, this fails silently if the consensus
- cannot be downloaded
:var bool start_when_requested: issues requests when our methods are called
if **True**, otherwise provides non-running
:class:`~stem.descriptor.remote.Query` instances
:var bool validate: checks the validity of the descriptor's content if
**True**, skips these checks otherwise
- :var bool fall_back_to_authority: when retrying request issues the last
- request to a directory authority if **True**
"""
- def __init__(self, retries = 2, use_mirrors = False, fall_back_to_authority = True, timeout = None, start_when_requested = True, validate = True):
+ def __init__(self, use_mirrors = False, retries = 2, fall_back_to_authority = True, timeout = None, start_when_requested = True, validate = True):
self.retries = retries
+ self.fall_back_to_authority = fall_back_to_authority
self.timeout = timeout
self.start_when_requested = start_when_requested
- self.fall_back_to_authority = fall_back_to_authority
self.validate = validate
self._endpoints = DIRECTORY_AUTHORITIES.values()
@@ -282,7 +398,7 @@ class DescriptorDownloader(object):
start_time = time.time()
self.use_directory_mirrors()
log.debug("Retrieve directory mirrors (took %0.2fs)" % (time.time() - start_time))
- except Exception, exc:
+ except Exception as exc:
log.debug("Unable to retrieve directory mirrors: %s" % exc)
def use_directory_mirrors(self):
@@ -295,10 +411,7 @@ class DescriptorDownloader(object):
new_endpoints = set(DIRECTORY_AUTHORITIES.values())
- query = self.get_consensus()
- query.run() # running explicitly so we'll raise errors
-
- for desc in query:
+ for desc in self.get_consensus().run():
if Flag.V2DIR in desc.flags:
new_endpoints.add((desc.address, desc.dir_port))
@@ -332,7 +445,7 @@ class DescriptorDownloader(object):
resource = '/tor/server/fp/%s' % '+'.join(fingerprints)
- return self._query(resource, 'server-descriptor 1.0')
+ return self._query(resource)
def get_extrainfo_descriptors(self, fingerprints = None):
"""
@@ -360,7 +473,7 @@ class DescriptorDownloader(object):
resource = '/tor/extra/fp/%s' % '+'.join(fingerprints)
- return self._query(resource, 'extra-info 1.0')
+ return self._query(resource)
def get_microdescriptors(self, hashes):
"""
@@ -385,9 +498,7 @@ class DescriptorDownloader(object):
if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE:
raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE)
- resource = '/tor/micro/d/%s' % '-'.join(hashes)
-
- return self._query(resource, 'microdescriptor 1.0')
+ return self._query('/tor/micro/d/%s' % '-'.join(hashes))
def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None):
"""
@@ -409,13 +520,9 @@ class DescriptorDownloader(object):
if authority_v3ident:
resource += '/%s' % authority_v3ident
- return self._query(
- resource,
- 'network-status-consensus-3 1.0',
- document_handler = document_handler,
- )
+ return self._query(resource, document_handler = document_handler)
- def _query(self, resource, descriptor_type, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
+ def _query(self, resource, descriptor_type = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
"""
Issues a request for the given resource.
"""
diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py
index e9d4e8a..7c45118 100644
--- a/test/integ/descriptor/remote.py
+++ b/test/integ/descriptor/remote.py
@@ -15,7 +15,8 @@ import test.runner
import stem.descriptor.networkstatus
-class TestDescriptorReader(unittest.TestCase):
+
+class TestDescriptorDownloader(unittest.TestCase):
def test_using_authorities(self):
"""
Fetches a descriptor from each of the directory authorities. This is
@@ -44,7 +45,7 @@ class TestDescriptorReader(unittest.TestCase):
for query in queries:
try:
descriptors = list(query.run())
- except Exception, exc:
+ except Exception as exc:
self.fail("Unable to use %s (%s:%s, %s): %s" % (authority, address, dirport, type(exc), exc))
self.assertEqual(1, len(descriptors))
diff --git a/test/settings.cfg b/test/settings.cfg
index d0fd5b0..80aaf9a 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -187,7 +187,7 @@ test.integ_tests
|test.integ.util.proc.TestProc
|test.integ.util.system.TestSystem
|test.integ.descriptor.reader.TestDescriptorReader
-|test.integ.descriptor.remote.TestDescriptorReader
+|test.integ.descriptor.remote.TestDescriptorDownloader
|test.integ.descriptor.server_descriptor.TestServerDescriptor
|test.integ.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor
|test.integ.descriptor.microdescriptor.TestMicrodescriptor
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index 3aadaac..fb2e3f0 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -71,7 +71,7 @@ class TestDescriptorDownloader(unittest.TestCase):
)
expeced_url = 'http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020C…'
- self.assertEqual(expeced_url, query.pick_url())
+ self.assertEqual(expeced_url, query._pick_url())
descriptors = list(query)
self.assertEqual(1, len(descriptors))
1
0
commit 90ec648ed075245f6ed2bae42dd4c6968e15fab1
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sun Jul 21 15:57:47 2013 -0700
Support for compressed descriptors
Ummm... well, that was easy. When I was initially looking into tor's directory
responses I had trouble getting valid compressed results. Kostas suspected
that the responses might have been truncated. Oh well, maybe a wget thing. In
either case adding compression support to our module was trivial.
---
stem/descriptor/remote.py | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 8ec6f1d..777f769 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -74,6 +74,7 @@ import sys
import threading
import time
import urllib2
+import zlib
import stem.descriptor
@@ -335,15 +336,17 @@ class Query(object):
self.download_url = self._pick_url(use_authority)
self.start_time = time.time()
- response = urllib2.urlopen(self.download_url, timeout = self.timeout)
+ response = urllib2.urlopen(self.download_url, timeout = self.timeout).read()
- # This sucks. We need to read the full response into memory before
- # processing the content. This is because urllib2 returns a 'file like'
- # object that lacks tell() or seek(). Hence we need to read it into our
- # own buffer that does support these.
+ if self.download_url.endswith('.z'):
+ response = zlib.decompress(response)
- response = io.BytesIO(response.read().strip())
- self._results = stem.descriptor.parse_file(response, self.descriptor_type, validate = self.validate, document_handler = self.document_handler)
+ self._results = stem.descriptor.parse_file(
+ io.BytesIO(response.strip()),
+ self.descriptor_type,
+ validate = self.validate,
+ document_handler = self.document_handler,
+ )
self.runtime = time.time() - self.start_time
log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
@@ -434,7 +437,7 @@ class DescriptorDownloader(object):
fingerprints (this is due to a limit on the url length by squid proxies).
"""
- resource = '/tor/server/all'
+ resource = '/tor/server/all.z'
if isinstance(fingerprints, str):
fingerprints = [fingerprints]
@@ -443,7 +446,7 @@ class DescriptorDownloader(object):
if len(fingerprints) > MAX_DESCRIPTOR_BATCH_SIZE:
raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE)
- resource = '/tor/server/fp/%s' % '+'.join(fingerprints)
+ resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints)
return self._query(resource)
@@ -462,7 +465,7 @@ class DescriptorDownloader(object):
fingerprints (this is due to a limit on the url length by squid proxies).
"""
- resource = '/tor/extra/all'
+ resource = '/tor/extra/all.z'
if isinstance(fingerprints, str):
fingerprints = [fingerprints]
@@ -471,7 +474,7 @@ class DescriptorDownloader(object):
if len(fingerprints) > MAX_DESCRIPTOR_BATCH_SIZE:
raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE)
- resource = '/tor/extra/fp/%s' % '+'.join(fingerprints)
+ resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints)
return self._query(resource)
@@ -498,7 +501,7 @@ class DescriptorDownloader(object):
if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE:
raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE)
- return self._query('/tor/micro/d/%s' % '-'.join(hashes))
+ return self._query('/tor/micro/d/%s.z' % '-'.join(hashes))
def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None):
"""
@@ -520,7 +523,7 @@ class DescriptorDownloader(object):
if authority_v3ident:
resource += '/%s' % authority_v3ident
- return self._query(resource, document_handler = document_handler)
+ return self._query(resource + '.z', document_handler = document_handler)
def _query(self, resource, descriptor_type = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
"""
1
0
commit 7f050ebfa475aeb39f6b1b4a0f7ad53620931646
Merge: b6c23b0 713b046
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sun Jul 21 18:39:17 2013 -0700
Remote descriptor fetching module
What started as a tor-dev@ thread has finally accumulated in a surprisingly
simple module. This provides the ability to download tor descriptors from
directory mirrors or authorities, and is made of up of two classes...
* Query
The core of the module. This is a request to download a given descriptor
resource from a pool of authorities and/or mirrors. This retries failed
requests and can be iterated over to get the descriptor results.
* DescriptorDownloader
A configurable class for issuing Queries. This provides only two things:
simple methods to get common descriptor types, and an easy way of specifying
default Query parameters.
https://lists.torproject.org/pipermail/tor-dev/2013-May/004924.html
https://trac.torproject.org/8257
docs/api.rst | 5 +-
docs/api/descriptor/remote.rst | 5 +
docs/contents.rst | 1 +
stem/control.py | 4 +-
stem/descriptor/__init__.py | 21 +-
stem/descriptor/extrainfo_descriptor.py | 18 +-
stem/descriptor/microdescriptor.py | 13 +-
stem/descriptor/networkstatus.py | 71 +++-
stem/descriptor/remote.py | 581 +++++++++++++++++++++++++++++
stem/descriptor/router_status_entry.py | 16 +-
stem/descriptor/server_descriptor.py | 29 +-
test/integ/descriptor/remote.py | 210 +++++++++++
test/runner.py | 1 +
test/settings.cfg | 4 +-
test/unit/descriptor/remote.py | 125 +++++++
test/unit/descriptor/server_descriptor.py | 8 +-
16 files changed, 1056 insertions(+), 56 deletions(-)
1
0
22 Jul '13
commit 713b04632e6ca1120c6d5c2661206fd84c01904b
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sun Jul 21 18:37:48 2013 -0700
Adding remote descriptor sphinx docs to site
Tidying up our pydocs and including it in our site.
---
docs/api.rst | 5 +++-
docs/api/descriptor/remote.rst | 5 ++++
docs/contents.rst | 1 +
stem/descriptor/remote.py | 62 +++++++++++++++++++++-------------------
4 files changed, 43 insertions(+), 30 deletions(-)
diff --git a/docs/api.rst b/docs/api.rst
index 92eb200..b6a5d8f 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -22,7 +22,9 @@ Descriptors
To read descriptors from disk use :func:`~stem.descriptor.__init__.parse_file` for
individual files and `stem.descriptor.reader
-<api/descriptor/reader.html>`_ for batches.
+<api/descriptor/reader.html>`_ for batches. You can also use
+`stem.descriptor.remote <api/descriptor/remote.html>`_ to download descriptors
+remotely like tor does.
* **Classes**
@@ -34,6 +36,7 @@ individual files and `stem.descriptor.reader
* `stem.descriptor.router_status_entry <api/descriptor/router_status_entry.html>`_ - Relay entries within a network status document.
* `stem.descriptor.reader <api/descriptor/reader.html>`_ - Reads and parses descriptor files from disk.
+* `stem.descriptor.remote <api/descriptor/remote.html>`_ - Downloads descriptors from directory mirrors and authorities.
* `stem.descriptor.export <api/descriptor/export.html>`_ - Exports descriptors to other formats.
Utilities
diff --git a/docs/api/descriptor/remote.rst b/docs/api/descriptor/remote.rst
new file mode 100644
index 0000000..eb0d30a
--- /dev/null
+++ b/docs/api/descriptor/remote.rst
@@ -0,0 +1,5 @@
+Descriptor Remote
+=================
+
+.. automodule:: stem.descriptor.remote
+
diff --git a/docs/contents.rst b/docs/contents.rst
index 834ce0a..7c9720b 100644
--- a/docs/contents.rst
+++ b/docs/contents.rst
@@ -35,6 +35,7 @@ Contents
api/descriptor/export
api/descriptor/reader
+ api/descriptor/remote
api/util/conf
api/util/connection
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 0aabd74..da1df63 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -13,8 +13,8 @@ content. For example...
from stem.descriptor.remote import DescriptorDownloader
downloader = DescriptorDownloader(
- cache = '/tmp/descriptor_cache',
use_mirrors = True,
+ timeout = 10,
)
query = downloader.get_server_descriptors()
@@ -48,19 +48,19 @@ itself...
DescriptorDownloader - Configurable class for issuing queries
|- use_directory_mirrors - use directory mirrors to download future descriptors
- |- get_server_descriptors - provides present :class:`~stem.descriptor.stem.descriptor.server_descriptor.ServerDescriptor`
- |- get_extrainfo_descriptors - provides present :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor`
- |- get_microdescriptors - provides present :class:`~stem.descriptor.microdescriptor.Microdescriptor`
- |- get_consensus - provides present :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`
- |- get_key_certificates - provides present :class:`~stem.descriptor.networkstatus.KeyCertificate`
+ |- get_server_descriptors - provides present server descriptors
+ |- get_extrainfo_descriptors - provides present extrainfo descriptors
+ |- get_microdescriptors - provides present microdescriptors
+ |- get_consensus - provides the present consensus or router status entries
+ |- get_key_certificates - provides present authority key certificates
+- query - request an arbitrary descriptor resource
-.. data:: MAX_DESCRIPTOR_BATCH_SIZE
+.. data:: MAX_FINGERPRINTS
- Maximum number of server or extrainfo descriptors that can requested at a
- time by their fingerprints.
+ Maximum number of descriptors that can requested at a time by their
+ fingerprints.
-.. data:: MAX_MICRODESCRIPTOR_BATCH_SIZE
+.. data:: MAX_MICRODESCRIPTOR_HASHES
Maximum number of microdescriptors that can requested at a time by their
hashes.
@@ -86,8 +86,8 @@ from stem.util import log
# Tor has a limited number of descriptors we can fetch explicitly by their
# fingerprint or hashes due to a limit on the url length by squid proxies.
-MAX_DESCRIPTOR_BATCH_SIZE = 96
-MAX_MICRODESCRIPTOR_BATCH_SIZE = 92
+MAX_FINGERPRINTS = 96
+MAX_MICRODESCRIPTOR_HASHES = 92
# Tor directory authorities as of commit f631b73 (7/4/13). This should only
# include authorities with 'v3ident':
@@ -134,7 +134,7 @@ class Query(object):
To block on the response and get results either call
:func:`~stem.descriptor.remote.Query.run` or iterate over the Query. The
- :func:`~stem.descriptor.remote.run` method pass along any errors that
+ :func:`~stem.descriptor.remote.Query.run` method pass along any errors that
arise...
::
@@ -143,7 +143,7 @@ class Query(object):
query = Query(
'/tor/server/all.z',
- 'server-descriptor 1.0',
+ descriptor_type = 'server-descriptor 1.0',
timeout = 30,
)
@@ -171,16 +171,18 @@ class Query(object):
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_).
Commonly useful ones include...
- ======== ===========
+ ===================================== ===========
Resource Description
- ======== ===========
+ ===================================== ===========
/tor/server/all.z all present server descriptors
/tor/server/fp/<fp1>+<fp2>+<fp3>.z server descriptors with the given fingerprints
/tor/extra/all.z all present extrainfo descriptors
/tor/extra/fp/<fp1>+<fp2>+<fp3>.z extrainfo descriptors with the given fingerprints
/tor/micro/d/<hash1>-<hash2>.z microdescriptors with the given hashes
/tor/status-vote/current/consensus.z present consensus
- ======== ===========
+ /tor/keys/all.z key certificates for the authorities
+ /tor/keys/fp/<v3ident1>+<v3ident2>.z key certificates for specific authorities
+ ===================================== ===========
The '.z' suffix can be excluded to get a plaintext rather than compressed
response. Compression is handled transparently, so this shouldn't matter to
@@ -211,7 +213,9 @@ class Query(object):
:var bool validate: checks the validity of the descriptor's content if
**True**, skips these checks otherwise
:var stem.descriptor.__init__.DocumentHandler document_handler: method in
- which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
+ which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
+
+ :param bool start: start making the request when constructed (default is **True**)
"""
def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
@@ -432,8 +436,8 @@ class DescriptorDownloader(object):
fingerprints = [fingerprints]
if fingerprints:
- if len(fingerprints) > MAX_DESCRIPTOR_BATCH_SIZE:
- raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE)
+ if len(fingerprints) > MAX_FINGERPRINTS:
+ raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS)
resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints)
@@ -462,8 +466,8 @@ class DescriptorDownloader(object):
fingerprints = [fingerprints]
if fingerprints:
- if len(fingerprints) > MAX_DESCRIPTOR_BATCH_SIZE:
- raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE)
+ if len(fingerprints) > MAX_FINGERPRINTS:
+ raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS)
resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints)
@@ -491,21 +495,21 @@ class DescriptorDownloader(object):
if isinstance(hashes, str):
hashes = [hashes]
- if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE:
- raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE)
+ if len(hashes) > MAX_MICRODESCRIPTOR_HASHES:
+ raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_HASHES)
return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args)
- def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None, **query_args):
+ def get_consensus(self, authority_v3ident = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **query_args):
"""
Provides the present router status entries.
- :param stem.descriptor.__init__.DocumentHandler document_handler: method in
- which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`
:param str authority_v3ident: fingerprint of the authority key for which
to get the consensus, see `'v3ident' in tor's config.c
<https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
for the values.
+ :param stem.descriptor.__init__.DocumentHandler document_handler: method in
+ which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
@@ -546,8 +550,8 @@ class DescriptorDownloader(object):
authority_v3idents = [authority_v3idents]
if authority_v3idents:
- if len(authority_v3idents) > MAX_DESCRIPTOR_BATCH_SIZE:
- raise ValueError("Unable to request more than %i key certificates at a time by their identity fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE)
+ if len(authority_v3idents) > MAX_FINGERPRINTS:
+ raise ValueError("Unable to request more than %i key certificates at a time by their identity fingerprints" % MAX_FINGERPRINTS)
resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents)
1
0
commit d5b3ec93f44de01b21b27264e761fe8f09ec8012
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sun Jul 21 10:45:56 2013 -0700
Fixing "cannot unmarshal code" errors
Damnit python, your import scheme is stupidly confusing.
The descriptor's __init__ module has a circular dependency with its contents.
This is because the parse_file() function calls the constituent modules, while
those modules need the Descriptor class from __init__.
So far so good. Only trouble is that python's support for circular dependencies
sucks. To address this I did lazy imports in __init__, so we imported within
the parse_file() function.
On the surface this seemed to work. All the tests certainly passed. The trouble
is that this style of python import is buggy as hell. Turns out that lazy
imports leave the module in question in a unexecutable state so this *only*
works if you've also imported the module another time during the interpretor
execution. Our tests did this, hence passing tests.
I first encuntered "cannot unmarshal code" while writing the remote descritpor
tests (both unit and integ). I was content to hack around this with superfluous
import statements while this only manifested within the tests, but now I'm
seeing it during general usage too...
>>> from stem.descriptor.remote import DescriptorDownloader
>>> list(DescriptorDownloader().get_microdescriptors('jzcx+1fHsi47Tu+vQIcyItgn4lKs6aKnFshQ0lZ2JTg'))
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "stem/descriptor/remote.py", line 311, in __iter__
for desc in self.run(True):
File "stem/descriptor/remote.py", line 300, in run
for desc in self._results:
File "stem/descriptor/__init__.py", line 154, in parse_file
import stem.descriptor.server_descriptor
RuntimeError: cannot unmarshal code objects in restricted execution mode
Joy. After much head scratching and forum reading it sounds like there's
something magical about 'from' imports so switching the descriptor modules to
that, and moving the __init__ imports to the end. I'm not entirely clear on the
magic going on here, but its elmiminated the errors.
---
stem/descriptor/__init__.py | 15 ++++++------
stem/descriptor/extrainfo_descriptor.py | 18 +++++++++-----
stem/descriptor/microdescriptor.py | 13 ++++++----
stem/descriptor/networkstatus.py | 40 ++++++++++++++++++-------------
stem/descriptor/router_status_entry.py | 16 +++++++++----
stem/descriptor/server_descriptor.py | 26 ++++++++++++--------
test/integ/descriptor/remote.py | 4 ----
test/unit/descriptor/remote.py | 7 ------
8 files changed, 78 insertions(+), 61 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index f1fdee4..e3b5a8b 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -151,11 +151,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
return
- import stem.descriptor.server_descriptor
- import stem.descriptor.extrainfo_descriptor
- import stem.descriptor.networkstatus
- import stem.descriptor.microdescriptor
-
# The tor descriptor specifications do not provide a reliable method for
# identifying a descriptor file's type and version so we need to guess
# based on its filename. Metrics descriptors, however, can be identified
@@ -217,9 +212,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler):
# Parses descriptor files from metrics, yielding individual descriptors. This
# throws a TypeError if the descriptor_type or version isn't recognized.
- import stem.descriptor.server_descriptor
- import stem.descriptor.extrainfo_descriptor
- import stem.descriptor.networkstatus
if descriptor_type == "server-descriptor" and major_version == 1:
for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate):
@@ -541,3 +533,10 @@ def _get_descriptor_components(raw_contents, validate, extra_keywords = ()):
return entries, extra_entries
else:
return entries
+
+# importing at the end to avoid circular dependencies on our Descriptor class
+
+import stem.descriptor.server_descriptor
+import stem.descriptor.extrainfo_descriptor
+import stem.descriptor.networkstatus
+import stem.descriptor.microdescriptor
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index e9aea30..fac0991 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -72,11 +72,17 @@ import datetime
import hashlib
import re
-import stem.descriptor
import stem.util.connection
import stem.util.enum
import stem.util.str_tools
+from stem.descriptor import (
+ PGP_BLOCK_END,
+ Descriptor,
+ _read_until_keywords,
+ _get_descriptor_components,
+)
+
# known statuses for dirreq-v2-resp and dirreq-v3-resp...
DirResponse = stem.util.enum.Enum(
("OK", "ok"),
@@ -156,11 +162,11 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True):
"""
while True:
- extrainfo_content = stem.descriptor._read_until_keywords("router-signature", descriptor_file)
+ extrainfo_content = _read_until_keywords("router-signature", descriptor_file)
# we've reached the 'router-signature', now include the pgp style block
- block_end_prefix = stem.descriptor.PGP_BLOCK_END.split(' ', 1)[0]
- extrainfo_content += stem.descriptor._read_until_keywords(block_end_prefix, descriptor_file, True)
+ block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+ extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
if extrainfo_content:
if is_bridge:
@@ -205,7 +211,7 @@ def _parse_timestamp_and_interval(keyword, content):
raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))
-class ExtraInfoDescriptor(stem.descriptor.Descriptor):
+class ExtraInfoDescriptor(Descriptor):
"""
Extra-info descriptor document.
@@ -400,7 +406,7 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
self._unrecognized_lines = []
- entries = stem.descriptor._get_descriptor_components(raw_contents, validate)
+ entries = _get_descriptor_components(raw_contents, validate)
if validate:
for keyword in self._required_fields():
diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py
index 499e170..5834e18 100644
--- a/stem/descriptor/microdescriptor.py
+++ b/stem/descriptor/microdescriptor.py
@@ -66,10 +66,15 @@ Doing the same is trivial with server descriptors...
import hashlib
-import stem.descriptor
import stem.descriptor.router_status_entry
import stem.exit_policy
+from stem.descriptor import (
+ Descriptor,
+ _get_descriptor_components,
+ _read_until_keywords,
+)
+
REQUIRED_FIELDS = (
"onion-key",
)
@@ -99,7 +104,7 @@ def _parse_file(descriptor_file, validate = True):
"""
while True:
- annotations = stem.descriptor._read_until_keywords("onion-key", descriptor_file)
+ annotations = _read_until_keywords("onion-key", descriptor_file)
# read until we reach an annotation or onion-key line
descriptor_lines = []
@@ -136,7 +141,7 @@ def _parse_file(descriptor_file, validate = True):
break # done parsing descriptors
-class Microdescriptor(stem.descriptor.Descriptor):
+class Microdescriptor(Descriptor):
"""
Microdescriptor (`descriptor specification
<https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_)
@@ -174,7 +179,7 @@ class Microdescriptor(stem.descriptor.Descriptor):
self._annotation_lines = annotations if annotations else []
self._annotation_dict = None # cached breakdown of key/value mappings
- entries = stem.descriptor._get_descriptor_components(raw_contents, validate)
+ entries = _get_descriptor_components(raw_contents, validate)
self._parse(entries, validate)
if validate:
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index f65c7dc..baf7f0a 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -49,12 +49,18 @@ For more information see :func:`~stem.descriptor.__init__.DocumentHandler`...
import datetime
import io
-import stem.descriptor
import stem.descriptor.router_status_entry
import stem.util.str_tools
import stem.util.tor_tools
import stem.version
+from stem.descriptor import (
+ Descriptor,
+ DocumentHandler,
+ _get_descriptor_components,
+ _read_until_keywords,
+)
+
# Version 2 network status document fields, tuples of the form...
# (keyword, is_mandatory)
@@ -138,7 +144,7 @@ KEY_CERTIFICATE_PARAMS = (
)
-def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
+def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = DocumentHandler.ENTRIES):
"""
Parses a network status and iterates over the RouterStatusEntry in it. The
document that these instances reference have an empty 'routers' attribute to
@@ -180,24 +186,24 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde
else:
raise ValueError("Document type %i isn't recognized (only able to parse v2, v3, and bridge)" % document_type)
- if document_handler == stem.descriptor.DocumentHandler.DOCUMENT:
+ if document_handler == DocumentHandler.DOCUMENT:
yield document_type(document_file.read(), validate)
return
# getting the document without the routers section
- header = stem.descriptor._read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file)
+ header = _read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file)
routers_start = document_file.tell()
- stem.descriptor._read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True)
+ _read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True)
routers_end = document_file.tell()
footer = document_file.readlines()
document_content = bytes.join(b"", header + footer)
- if document_handler == stem.descriptor.DocumentHandler.BARE_DOCUMENT:
+ if document_handler == DocumentHandler.BARE_DOCUMENT:
yield document_type(document_content, validate)
- elif document_handler == stem.descriptor.DocumentHandler.ENTRIES:
+ elif document_handler == DocumentHandler.ENTRIES:
desc_iterator = stem.descriptor.router_status_entry._parse_file(
document_file,
validate,
@@ -214,7 +220,7 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde
raise ValueError("Unrecognized document_handler: %s" % document_handler)
-class NetworkStatusDocument(stem.descriptor.Descriptor):
+class NetworkStatusDocument(Descriptor):
"""
Common parent for network status documents.
"""
@@ -281,7 +287,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
# deprecated descriptor type - patches welcome if you want those checks.
document_file = io.BytesIO(raw_content)
- document_content = bytes.join(b"", stem.descriptor._read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file))
+ document_content = bytes.join(b"", _read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file))
router_iter = stem.descriptor.router_status_entry._parse_file(
document_file,
@@ -297,7 +303,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
document_content += b"\n" + document_file.read()
document_content = stem.util.str_tools._to_unicode(document_content)
- entries = stem.descriptor._get_descriptor_components(document_content, validate)
+ entries = _get_descriptor_components(document_content, validate)
if validate:
self._check_constraints(entries)
@@ -556,9 +562,9 @@ class _DocumentHeader(object):
self._unrecognized_lines = []
- content = bytes.join(b"", stem.descriptor._read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file))
+ content = bytes.join(b"", _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file))
content = stem.util.str_tools._to_unicode(content)
- entries = stem.descriptor._get_descriptor_components(content, validate)
+ entries = _get_descriptor_components(content, validate)
self._parse(entries, validate)
# doing this validation afterward so we know our 'is_consensus' and
@@ -792,7 +798,7 @@ class _DocumentFooter(object):
if not content:
return # footer is optional and there's nothing to parse
- entries = stem.descriptor._get_descriptor_components(content, validate)
+ entries = _get_descriptor_components(content, validate)
self._parse(entries, validate, header)
if validate:
@@ -948,7 +954,7 @@ def _parse_int_mappings(keyword, value, validate):
return results
-class DirectoryAuthority(stem.descriptor.Descriptor):
+class DirectoryAuthority(Descriptor):
"""
Directory authority information obtained from a v3 network status document.
@@ -1034,7 +1040,7 @@ class DirectoryAuthority(stem.descriptor.Descriptor):
else:
key_cert_content = None
- entries = stem.descriptor._get_descriptor_components(content, validate)
+ entries = _get_descriptor_components(content, validate)
if validate and 'dir-source' != entries.keys()[0]:
raise ValueError("Authority entries are expected to start with a 'dir-source' line:\n%s" % (content))
@@ -1168,7 +1174,7 @@ class DirectoryAuthority(stem.descriptor.Descriptor):
return self._compare(other, lambda s, o: s <= o)
-class KeyCertificate(stem.descriptor.Descriptor):
+class KeyCertificate(Descriptor):
"""
Directory key certificate for a v3 network status document.
@@ -1216,7 +1222,7 @@ class KeyCertificate(stem.descriptor.Descriptor):
:raises: **ValueError** if a validity check fails
"""
- entries = stem.descriptor._get_descriptor_components(content, validate)
+ entries = _get_descriptor_components(content, validate)
if validate:
if 'dir-key-certificate-version' != entries.keys()[0]:
diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py
index 076baab..80b6623 100644
--- a/stem/descriptor/router_status_entry.py
+++ b/stem/descriptor/router_status_entry.py
@@ -23,10 +23,16 @@ import base64
import binascii
import datetime
-import stem.descriptor
import stem.exit_policy
import stem.util.str_tools
+from stem.descriptor import (
+ KEYWORD_LINE,
+ Descriptor,
+ _get_descriptor_components,
+ _read_until_keywords,
+)
+
def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start_position = None, end_position = None, section_end_keywords = (), extra_args = ()):
"""
@@ -64,7 +70,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start
# check if we're starting at the end of the section (ie, there's no entries to read)
if section_end_keywords:
first_keyword = None
- line_match = stem.descriptor.KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
+ line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
if line_match:
first_keyword = line_match.groups()[0]
@@ -75,7 +81,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start
return
while end_position is None or document_file.tell() < end_position:
- desc_lines, ending_keyword = stem.descriptor._read_until_keywords(
+ desc_lines, ending_keyword = _read_until_keywords(
(entry_keyword,) + section_end_keywords,
document_file,
ignore_first = True,
@@ -95,7 +101,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start
break
-class RouterStatusEntry(stem.descriptor.Descriptor):
+class RouterStatusEntry(Descriptor):
"""
Information about an individual router stored within a network status
document. This is the common parent for concrete status entry types.
@@ -147,7 +153,7 @@ class RouterStatusEntry(stem.descriptor.Descriptor):
self._unrecognized_lines = []
- entries = stem.descriptor._get_descriptor_components(content, validate)
+ entries = _get_descriptor_components(content, validate)
if validate:
self._check_constraints(entries)
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index d23f41e..39d4645 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -34,7 +34,6 @@ import datetime
import hashlib
import re
-import stem.descriptor
import stem.descriptor.extrainfo_descriptor
import stem.exit_policy
import stem.prereq
@@ -45,6 +44,14 @@ import stem.version
from stem.util import log
+from stem.descriptor import (
+ PGP_BLOCK_END,
+ Descriptor,
+ _get_bytes_field,
+ _get_descriptor_components,
+ _read_until_keywords,
+)
+
# relay descriptors must have exactly one of the following
REQUIRED_FIELDS = (
"router",
@@ -118,12 +125,12 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True):
# to the caller).
while True:
- annotations = stem.descriptor._read_until_keywords("router", descriptor_file)
- descriptor_content = stem.descriptor._read_until_keywords("router-signature", descriptor_file)
+ annotations = _read_until_keywords("router", descriptor_file)
+ descriptor_content = _read_until_keywords("router-signature", descriptor_file)
# we've reached the 'router-signature', now include the pgp style block
- block_end_prefix = stem.descriptor.PGP_BLOCK_END.split(' ', 1)[0]
- descriptor_content += stem.descriptor._read_until_keywords(block_end_prefix, descriptor_file, True)
+ block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+ descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
if descriptor_content:
# strip newlines from annotations
@@ -142,7 +149,7 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True):
break # done parsing descriptors
-class ServerDescriptor(stem.descriptor.Descriptor):
+class ServerDescriptor(Descriptor):
"""
Common parent for server descriptors.
@@ -216,8 +223,8 @@ class ServerDescriptor(stem.descriptor.Descriptor):
# Only a few things can be arbitrary bytes according to the dir-spec, so
# parsing them separately.
- self.platform = stem.descriptor._get_bytes_field("platform", raw_contents)
- self.contact = stem.descriptor._get_bytes_field("contact", raw_contents)
+ self.platform = _get_bytes_field("platform", raw_contents)
+ self.contact = _get_bytes_field("contact", raw_contents)
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
@@ -272,8 +279,7 @@ class ServerDescriptor(stem.descriptor.Descriptor):
# influences the resulting exit policy, but for everything else the order
# does not matter so breaking it into key / value pairs.
- entries, policy = \
- stem.descriptor._get_descriptor_components(raw_contents, validate, ("accept", "reject"))
+ entries, policy = _get_descriptor_components(raw_contents, validate, ("accept", "reject"))
self.exit_policy = stem.exit_policy.ExitPolicy(*policy)
self._parse(entries, validate)
diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py
index 7c45118..b3c549d 100644
--- a/test/integ/descriptor/remote.py
+++ b/test/integ/descriptor/remote.py
@@ -11,10 +11,6 @@ import stem.descriptor.router_status_entry
import stem.descriptor.server_descriptor
import test.runner
-# Required to prevent unmarshal error when running this test alone.
-
-import stem.descriptor.networkstatus
-
class TestDescriptorDownloader(unittest.TestCase):
def test_using_authorities(self):
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index fb2e3f0..3ea303b 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -10,13 +10,6 @@ import stem.descriptor.remote
from mock import patch
-# The following isn't used by this directly, but we're still importing it due
-# to a screwy aspect of how mock works. If patched() results in an import that
-# we haven't done before then we can fail with a RuntimeError. In practice this
-# just arises if we run this unit test on its own.
-
-import stem.descriptor.networkstatus
-
# Output from requesting moria1's descriptor from itself...
# % curl http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020C…
1
0
22 Jul '13
commit 4d122b1a417fe318f0bb3d169688d25f3e749754
Author: Damian Johnson <atagar(a)torproject.org>
Date: Sun Jul 21 17:10:26 2013 -0700
Implementing a get_key_certificates() method
Method for fetching key certificates for the authorities. This included a
little work so parse_file() could return multiple certificates when they're
concatenated together.
---
stem/descriptor/__init__.py | 3 ++-
stem/descriptor/networkstatus.py | 31 +++++++++++++++++++++++++++++++
stem/descriptor/remote.py | 36 ++++++++++++++++++++++++++++++++++++
test/integ/descriptor/remote.py | 30 ++++++++++++++++++++++++++++++
4 files changed, 99 insertions(+), 1 deletion(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index e3b5a8b..14b29d1 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -237,7 +237,8 @@ def _parse_metrics_file(descriptor_type, major_version, minor_version, descripto
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler):
yield desc
elif descriptor_type == "dir-key-certificate-3" and major_version == 1:
- yield stem.descriptor.networkstatus.KeyCertificate(descriptor_file.read(), validate = validate)
+ for desc in stem.descriptor.networkstatus._parse_file_key_certs(descriptor_file, validate = validate):
+ yield desc
elif descriptor_type in ("network-status-consensus-3", "network-status-vote-3") and major_version == 1:
document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index baf7f0a..ec21304 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -55,6 +55,7 @@ import stem.util.tor_tools
import stem.version
from stem.descriptor import (
+ PGP_BLOCK_END,
Descriptor,
DocumentHandler,
_get_descriptor_components,
@@ -220,6 +221,36 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde
raise ValueError("Unrecognized document_handler: %s" % document_handler)
+def _parse_file_key_certs(certificate_file, validate = True):
+ """
+ Parses a file containing one or more authority key certificates.
+
+ :param file certificate_file: file with key certificates
+ :param bool validate: checks the validity of the certificate's contents if
+ **True**, skips these checks otherwise
+
+ :returns: iterator for :class:`stem.descriptor.networkstatus.KeyCertificate`
+ instance in the file
+
+ :raises:
+ * **ValueError** if the key certificate content is invalid and validate is
+ **True**
+ * **IOError** if the file can't be read
+ """
+
+ while True:
+ keycert_content = _read_until_keywords("dir-key-certification", certificate_file)
+
+ # we've reached the 'router-signature', now include the pgp style block
+ block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+ keycert_content += _read_until_keywords(block_end_prefix, certificate_file, True)
+
+ if keycert_content:
+ yield stem.descriptor.networkstatus.KeyCertificate(bytes.join(b"", keycert_content), validate = validate)
+ else:
+ break # done parsing file
+
+
class NetworkStatusDocument(Descriptor):
"""
Common parent for network status documents.
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index a009078..0aabd74 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -52,6 +52,7 @@ itself...
|- get_extrainfo_descriptors - provides present :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor`
|- get_microdescriptors - provides present :class:`~stem.descriptor.microdescriptor.Microdescriptor`
|- get_consensus - provides present :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`
+ |- get_key_certificates - provides present :class:`~stem.descriptor.networkstatus.KeyCertificate`
+- query - request an arbitrary descriptor resource
.. data:: MAX_DESCRIPTOR_BATCH_SIZE
@@ -118,6 +119,8 @@ def _guess_descriptor_type(resource):
return 'microdescriptor 1.0'
elif resource.startswith('/tor/status-vote/'):
return 'network-status-consensus-3 1.0'
+ elif resource.startswith('/tor/keys/'):
+ return 'dir-key-certificate-3 1.0'
else:
raise ValueError("Unable to determine the descriptor type for '%s'" % resource)
@@ -517,6 +520,39 @@ class DescriptorDownloader(object):
return self.query(resource + '.z', document_handler = document_handler, **query_args)
+ def get_key_certificates(self, authority_v3idents = None, **query_args):
+ """
+ Provides the key certificates for authorities with the given fingerprints.
+ If no fingerprints are provided then this returns all present key
+ certificates.
+
+ :param str authority_v3idents: fingerprint or list of fingerprints of the
+ authority keys, see `'v3ident' in tor's config.c
+ <https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_
+ for the values.
+ :param query_args: additional arguments for the
+ :class:`~stem.descriptor.remote.Query` constructor
+
+ :returns: :class:`~stem.descriptor.remote.Query` for the key certificates
+
+ :raises: **ValueError** if we request more than 96 key certificates by
+ their identity fingerprints (this is due to a limit on the url length by
+ squid proxies).
+ """
+
+ resource = '/tor/keys/all.z'
+
+ if isinstance(authority_v3idents, str):
+ authority_v3idents = [authority_v3idents]
+
+ if authority_v3idents:
+ if len(authority_v3idents) > MAX_DESCRIPTOR_BATCH_SIZE:
+ raise ValueError("Unable to request more than %i key certificates at a time by their identity fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE)
+
+ resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents)
+
+ return self.query(resource, **query_args)
+
def query(self, resource, **query_args):
"""
Issues a request for the given resource.
diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py
index b3c549d..d11a4ae 100644
--- a/test/integ/descriptor/remote.py
+++ b/test/integ/descriptor/remote.py
@@ -6,6 +6,7 @@ import unittest
import stem.descriptor.extrainfo_descriptor
import stem.descriptor.microdescriptor
+import stem.descriptor.networkstatus
import stem.descriptor.remote
import stem.descriptor.router_status_entry
import stem.descriptor.server_descriptor
@@ -178,3 +179,32 @@ class TestDescriptorDownloader(unittest.TestCase):
consensus = list(consensus_query)
self.assertTrue(len(consensus) > 50)
self.assertTrue(isinstance(consensus[0], stem.descriptor.router_status_entry.RouterStatusEntryV3))
+
+ def test_get_key_certificates(self):
+ """
+ Exercises the downloader's get_key_certificates() method.
+ """
+
+ if test.runner.require_online(self):
+ return
+ elif test.runner.only_run_once(self, "test_get_key_certificates"):
+ return
+
+ downloader = stem.descriptor.remote.DescriptorDownloader()
+
+ single_query = downloader.get_key_certificates('D586D18309DED4CD6D57C18FDB97EFA96D330566')
+
+ multiple_query = downloader.get_key_certificates([
+ 'D586D18309DED4CD6D57C18FDB97EFA96D330566',
+ '14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4',
+ ])
+
+ single_query.run()
+ multiple_query.run()
+
+ single_query_results = list(single_query)
+ self.assertEqual(1, len(single_query_results))
+ self.assertEqual('D586D18309DED4CD6D57C18FDB97EFA96D330566', single_query_results[0].fingerprint)
+ self.assertTrue(isinstance(single_query_results[0], stem.descriptor.networkstatus.KeyCertificate))
+
+ self.assertEqual(2, len(list(multiple_query)))
1
0