July 2013 - tor-commits - lists.torproject.org

[stem/master] Implementing a get_consensus() method
by atagar＠torproject.org 22 Jul '13

22 Jul '13

commit 5514b2cfd7d313f429bfe1bb8a13c975f7c3413f Author: Damian Johnson <atagar(a)torproject.org> Date: Thu Jul 18 09:59:30 2013 -0700 Implementing a get_consensus() method Originally this was gonna be a get_network_status(fingerprint) method but evedently we can't request individual router status entries. Understandable since signatures are for the whole document but still a bit of a pity from an API perspective. Oh well. --- stem/descriptor/__init__.py | 2 +- stem/descriptor/remote.py | 60 +++++++++++++++++++++++++++++---------- test/integ/descriptor/remote.py | 32 ++++++++++++++++++++- test/runner.py | 1 + 4 files changed, 78 insertions(+), 17 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index a527329..82f846d 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -132,7 +132,7 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen :param bool validate: checks the validity of the descriptor's content if **True**, skips these checks otherwise :param stem.descriptor.__init__.DocumentHandler document_handler: method in - which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` + which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` :returns: iterator for :class:`~stem.descriptor.__init__.Descriptor` instances in the file diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index a18d991..66e6d85 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -82,6 +82,8 @@ class Query(object): :var str resource: resource being fetched, such as '/tor/status-vote/current/consensus.z' :var str descriptor_type: type of descriptors being fetched, see :func:`~stem.descriptor.__init__.parse_file` + :param stem.descriptor.__init__.DocumentHandler document_handler: method in + which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` :var list endpoints: (address, dirport) tuples of the authority or mirror we're querying, this uses authorities if undefined @@ -101,9 +103,10 @@ class Query(object): finished """ - def __init__(self, resource, descriptor_type, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True): + def __init__(self, resource, descriptor_type, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES): self.resource = resource self.descriptor_type = descriptor_type + self.document_handler = document_handler self.endpoints = endpoints if endpoints else [] self.retries = retries @@ -220,7 +223,7 @@ class Query(object): response = io.BytesIO(response.read().strip()) - self._results = stem.descriptor.parse_file(response, self.descriptor_type) + self._results = stem.descriptor.parse_file(response, self.descriptor_type, document_handler = self.document_handler) log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime)) except: exc = sys.exc_info()[1] @@ -278,14 +281,14 @@ class DescriptorDownloader(object): resource = '/tor/server/all' + if isinstance(fingerprints, str): + fingerprints = [fingerprints] + if fingerprints: - if isinstance(fingerprints, str): - resource = '/tor/server/fp/%s' % fingerprints - else: - if len(fingerprints) > MAX_BATCH_SIZE: - raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_BATCH_SIZE) + if len(fingerprints) > MAX_BATCH_SIZE: + raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_BATCH_SIZE) - resource = '/tor/server/fp/%s' % '+'.join(fingerprints) + resource = '/tor/server/fp/%s' % '+'.join(fingerprints) return self._query(resource, 'server-descriptor 1.0') @@ -306,18 +309,44 @@ class DescriptorDownloader(object): resource = '/tor/extra/all' + if isinstance(fingerprints, str): + fingerprints = [fingerprints] + if fingerprints: - if isinstance(fingerprints, str): - resource = '/tor/extra/fp/%s' % fingerprints - else: - if len(fingerprints) > MAX_BATCH_SIZE: - raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_BATCH_SIZE) + if len(fingerprints) > MAX_BATCH_SIZE: + raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_BATCH_SIZE) - resource = '/tor/extra/fp/%s' % '+'.join(fingerprints) + resource = '/tor/extra/fp/%s' % '+'.join(fingerprints) return self._query(resource, 'extra-info 1.0') - def _query(self, resource, descriptor_type): + def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None): + """ + Provides the present router status entries. + + :param stem.descriptor.__init__.DocumentHandler document_handler: method in + which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3` + :param str authority_v3ident: fingerprint of the authority key for which + to get the consensus, see `'v3ident' in tor's config.c + <https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_ + for the values. + + :returns: :class:`~stem.descriptor.remote.Query` for the router status + entries + """ + + resource = '/tor/status-vote/current/consensus' + + if authority_v3ident: + resource += '/%s' % authority_v3ident + + return self._query( + resource, + 'network-status-consensus-3 1.0', + document_handler = document_handler, + ) + + def _query(self, resource, descriptor_type, document_handler = stem.descriptor.DocumentHandler.ENTRIES): """ Issues a request for the given resource. """ @@ -330,4 +359,5 @@ class DescriptorDownloader(object): fall_back_to_authority = self.fall_back_to_authority, timeout = self.timeout, start = self.start_when_requested, + document_handler = document_handler, ) diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py index 62348ab..9273b61 100644 --- a/test/integ/descriptor/remote.py +++ b/test/integ/descriptor/remote.py @@ -4,9 +4,10 @@ Integration tests for stem.descriptor.remote. import unittest -import stem.descriptor.server_descriptor import stem.descriptor.extrainfo_descriptor import stem.descriptor.remote +import stem.descriptor.router_status_entry +import stem.descriptor.server_descriptor import test.runner # Required to prevent unmarshal error when running this test alone. @@ -26,6 +27,8 @@ class TestDescriptorReader(unittest.TestCase): if test.runner.require_online(self): return + elif test.runner.only_run_once(self, "test_using_authorities"): + return queries = [] @@ -51,6 +54,11 @@ class TestDescriptorReader(unittest.TestCase): Exercises the downloader's get_server_descriptors() method. """ + if test.runner.require_online(self): + return + elif test.runner.only_run_once(self, "test_get_server_descriptors"): + return + downloader = stem.descriptor.remote.DescriptorDownloader() # Fetch a single descriptor and a batch. I'd love to also exercise @@ -82,6 +90,11 @@ class TestDescriptorReader(unittest.TestCase): Exercises the downloader's get_extrainfo_descriptors() method. """ + if test.runner.require_online(self): + return + elif test.runner.only_run_once(self, "test_get_extrainfo_descriptors"): + return + downloader = stem.descriptor.remote.DescriptorDownloader() single_query = downloader.get_extrainfo_descriptors('9695DFC35FFEB861329B9F1AB04C46397020CE31') @@ -101,4 +114,21 @@ class TestDescriptorReader(unittest.TestCase): self.assertEqual(2, len(list(multiple_query))) + def test_get_consensus(self): + """ + Exercises the downloader's get_consensus() method. + """ + + if test.runner.require_online(self): + return + elif test.runner.only_run_once(self, "test_get_consensus"): + return + + downloader = stem.descriptor.remote.DescriptorDownloader() + + consensus_query = downloader.get_consensus() + consensus_query.run() + consensus = list(consensus_query) + self.assertTrue(len(consensus) > 50) + self.assertTrue(isinstance(consensus[0], stem.descriptor.router_status_entry.RouterStatusEntryV3)) diff --git a/test/runner.py b/test/runner.py index 9bd62f7..a1eda9e 100644 --- a/test/runner.py +++ b/test/runner.py @@ -16,6 +16,7 @@ about the tor test instance they're running against. require_control - skips the test unless tor provides a controller endpoint require_version - skips the test unless we meet a tor version requirement require_online - skips unless targets allow for online tests + only_run_once - skip the test if it has been ran before exercise_controller - basic sanity check that a controller connection can be used get_runner - Singleton for fetching our runtime context.

1 0

[stem/master] Adding a query_args parameter to all downloader methods
by atagar＠torproject.org 22 Jul '13

22 Jul '13

commit e95e00dd6431001000a79e2bfc009a7f3abbde80 Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jul 21 16:31:00 2013 -0700 Adding a query_args parameter to all downloader methods Simple tweak in case callers want to do something dynamic (such as varying the timeout with each request). --- stem/descriptor/remote.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 01130f3..a009078 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -406,7 +406,7 @@ class DescriptorDownloader(object): self._endpoints = list(new_endpoints) - def get_server_descriptors(self, fingerprints = None): + def get_server_descriptors(self, fingerprints = None, **query_args): """ Provides the server descriptors with the given fingerprints. If no fingerprints are provided then this returns all descriptors in the present @@ -414,6 +414,8 @@ class DescriptorDownloader(object): :param str,list fingerprints: fingerprint or list of fingerprints to be retrieved, gets all descriptors if **None** + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor :returns: :class:`~stem.descriptor.remote.Query` for the server descriptors @@ -432,9 +434,9 @@ class DescriptorDownloader(object): resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints) - return self.query(resource) + return self.query(resource, **query_args) - def get_extrainfo_descriptors(self, fingerprints = None): + def get_extrainfo_descriptors(self, fingerprints = None, **query_args): """ Provides the extrainfo descriptors with the given fingerprints. If no fingerprints are provided then this returns all descriptors in the present @@ -442,6 +444,8 @@ class DescriptorDownloader(object): :param str,list fingerprints: fingerprint or list of fingerprints to be retrieved, gets all descriptors if **None** + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor :returns: :class:`~stem.descriptor.remote.Query` for the extrainfo descriptors @@ -460,9 +464,9 @@ class DescriptorDownloader(object): resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints) - return self.query(resource) + return self.query(resource, **query_args) - def get_microdescriptors(self, hashes): + def get_microdescriptors(self, hashes, **query_args): """ Provides the microdescriptors with the given hashes. To get these see the 'microdescriptor_hashes' attribute of @@ -472,6 +476,8 @@ class DescriptorDownloader(object): :param str,list hashes: microdescriptor hash or list of hashes to be retrieved + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor :returns: :class:`~stem.descriptor.remote.Query` for the microdescriptors @@ -485,9 +491,9 @@ class DescriptorDownloader(object): if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE: raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE) - return self.query('/tor/micro/d/%s.z' % '-'.join(hashes)) + return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args) - def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None): + def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None, **query_args): """ Provides the present router status entries. @@ -497,6 +503,8 @@ class DescriptorDownloader(object): to get the consensus, see `'v3ident' in tor's config.c <https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_ for the values. + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor :returns: :class:`~stem.descriptor.remote.Query` for the router status entries @@ -507,14 +515,14 @@ class DescriptorDownloader(object): if authority_v3ident: resource += '/%s' % authority_v3ident - return self.query(resource + '.z', document_handler = document_handler) + return self.query(resource + '.z', document_handler = document_handler, **query_args) - def query(self, resource, **kwargs): + def query(self, resource, **query_args): """ Issues a request for the given resource. :param str resource: resource being fetched, such as '/tor/server/all.z' - :param kwargs: additional arguments for the + :param query_args: additional arguments for the :class:`~stem.descriptor.remote.Query` constructor :returns: :class:`~stem.descriptor.remote.Query` for the descriptors @@ -523,11 +531,11 @@ class DescriptorDownloader(object): type can't be determined when 'descriptor_type' is **None** """ - query_args = dict(self._default_args) - query_args.update(kwargs) + args = dict(self._default_args) + args.update(query_args) return Query( resource, endpoints = self._endpoints, - **query_args + **args )

1 0

[stem/master] Simplifying DescriptorDownloader class
by atagar＠torproject.org 22 Jul '13

22 Jul '13

commit 6f12acc9e54842371f52203550a6ef2f9cbfb835 Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jul 21 16:25:58 2013 -0700 Simplifying DescriptorDownloader class The DescriptorDownloader is a very, very simple class that simply issues Queries on your behalf. Why should it pretend to be anything else? Dropping most of its attributes (and with it a lot of copy-and-pasted pydocs). Also making the query() method public. --- stem/descriptor/remote.py | 62 ++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 777f769..01130f3 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -51,7 +51,8 @@ itself... |- get_server_descriptors - provides present :class:`~stem.descriptor.stem.descriptor.server_descriptor.ServerDescriptor` |- get_extrainfo_descriptors - provides present :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor` |- get_microdescriptors - provides present :class:`~stem.descriptor.microdescriptor.Microdescriptor` - +- get_consensus - provides present :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` + |- get_consensus - provides present :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` + +- query - request an arbitrary descriptor resource .. data:: MAX_DESCRIPTOR_BATCH_SIZE @@ -365,35 +366,18 @@ class Query(object): class DescriptorDownloader(object): """ - Configurable class through which descriptors can be downloaded. This provides - caching, retries, and other capabilities to make downloading descriptors easy - and efficient. - - For more advanced use cases you can use the - :class:`~stem.descriptor.remote.Query` class directly. + Configurable class that issues :class:`~stem.descriptor.remote.Query` + instances on your behalf. :param bool use_mirrors: downloads the present consensus and uses the directory mirrors to fetch future requests, this fails silently if the consensus cannot be downloaded - - :var int retries: number of times to attempt the request if it fails - :var bool fall_back_to_authority: when retrying request issues the last - request to a directory authority if **True** - :var float timeout: duration before we'll time out our request, no timeout is - applied if **None** - :var bool start_when_requested: issues requests when our methods are called - if **True**, otherwise provides non-running - :class:`~stem.descriptor.remote.Query` instances - :var bool validate: checks the validity of the descriptor's content if - **True**, skips these checks otherwise + :param default_args: default arguments for the + :class:`~stem.descriptor.remote.Query` constructor """ - def __init__(self, use_mirrors = False, retries = 2, fall_back_to_authority = True, timeout = None, start_when_requested = True, validate = True): - self.retries = retries - self.fall_back_to_authority = fall_back_to_authority - self.timeout = timeout - self.start_when_requested = start_when_requested - self.validate = validate + def __init__(self, use_mirrors = False, **default_args): + self._default_args = default_args self._endpoints = DIRECTORY_AUTHORITIES.values() if use_mirrors: @@ -448,7 +432,7 @@ class DescriptorDownloader(object): resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints) - return self._query(resource) + return self.query(resource) def get_extrainfo_descriptors(self, fingerprints = None): """ @@ -476,7 +460,7 @@ class DescriptorDownloader(object): resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints) - return self._query(resource) + return self.query(resource) def get_microdescriptors(self, hashes): """ @@ -501,7 +485,7 @@ class DescriptorDownloader(object): if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE: raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE) - return self._query('/tor/micro/d/%s.z' % '-'.join(hashes)) + return self.query('/tor/micro/d/%s.z' % '-'.join(hashes)) def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None): """ @@ -523,23 +507,27 @@ class DescriptorDownloader(object): if authority_v3ident: resource += '/%s' % authority_v3ident - return self._query(resource + '.z', document_handler = document_handler) + return self.query(resource + '.z', document_handler = document_handler) - def _query(self, resource, descriptor_type = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES): + def query(self, resource, **kwargs): """ Issues a request for the given resource. + + :param str resource: resource being fetched, such as '/tor/server/all.z' + :param kwargs: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor + + :returns: :class:`~stem.descriptor.remote.Query` for the descriptors + + :raises: **ValueError** if resource is clearly invalid or the descriptor + type can't be determined when 'descriptor_type' is **None** """ - log.trace("Retrieving descriptors (resource: %s, type: %s)" % (resource, descriptor_type)) + query_args = dict(self._default_args) + query_args.update(kwargs) return Query( resource, - descriptor_type, endpoints = self._endpoints, - retries = self.retries, - fall_back_to_authority = self.fall_back_to_authority, - timeout = self.timeout, - start = self.start_when_requested, - validate = self.validate, - document_handler = document_handler, + **query_args )

1 0

[stem/master] Making descriptor validation optional
by atagar＠torproject.org 22 Jul '13

22 Jul '13

commit 25951d4f12a16a1fa6e42ea6b7479959d636139b Author: Damian Johnson <atagar(a)torproject.org> Date: Sat Jul 20 10:00:04 2013 -0700 Making descriptor validation optional Adding a 'validate' argument that can be passed thorough when parsing descriptors. --- stem/descriptor/remote.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index e0ca612..83416c8 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -84,8 +84,6 @@ class Query(object): :var str resource: resource being fetched, such as '/tor/status-vote/current/consensus.z' :var str descriptor_type: type of descriptors being fetched, see :func:`~stem.descriptor.__init__.parse_file` - :param stem.descriptor.__init__.DocumentHandler document_handler: method in - which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` :var list endpoints: (address, dirport) tuples of the authority or mirror we're querying, this uses authorities if undefined @@ -103,12 +101,16 @@ class Query(object): :var float timeout: duration before we'll time out our request :var float runtime: time our query took, this is **None** if it's not yet finished + + :var bool validate: checks the validity of the descriptor's content if + **True**, skips these checks otherwise + :var stem.descriptor.__init__.DocumentHandler document_handler: method in + which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` """ - def __init__(self, resource, descriptor_type, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES): + def __init__(self, resource, descriptor_type, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES): self.resource = resource self.descriptor_type = descriptor_type - self.document_handler = document_handler self.endpoints = endpoints if endpoints else [] self.retries = retries @@ -122,6 +124,9 @@ class Query(object): self.timeout = timeout self.runtime = None + self.validate = validate + self.document_handler = document_handler + self._downloader_thread = None self._downloader_thread_lock = threading.RLock() @@ -225,7 +230,7 @@ class Query(object): response = io.BytesIO(response.read().strip()) - self._results = stem.descriptor.parse_file(response, self.descriptor_type, document_handler = self.document_handler) + self._results = stem.descriptor.parse_file(response, self.descriptor_type, validate = self.validate, document_handler = self.document_handler) log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime)) except: exc = sys.exc_info()[1] @@ -258,15 +263,18 @@ class DescriptorDownloader(object): :var bool start_when_requested: issues requests when our methods are called if **True**, otherwise provides non-running :class:`~stem.descriptor.remote.Query` instances + :var bool validate: checks the validity of the descriptor's content if + **True**, skips these checks otherwise :var bool fall_back_to_authority: when retrying request issues the last request to a directory authority if **True** """ - def __init__(self, retries = 2, use_mirrors = False, fall_back_to_authority = True, timeout = None, start_when_requested = True): + def __init__(self, retries = 2, use_mirrors = False, fall_back_to_authority = True, timeout = None, start_when_requested = True, validate = True): self.retries = retries self.timeout = timeout self.start_when_requested = start_when_requested self.fall_back_to_authority = fall_back_to_authority + self.validate = validate self._endpoints = DIRECTORY_AUTHORITIES.values() if use_mirrors: @@ -422,5 +430,6 @@ class DescriptorDownloader(object): fall_back_to_authority = self.fall_back_to_authority, timeout = self.timeout, start = self.start_when_requested, + validate = self.validate, document_handler = document_handler, )

1 0

[stem/master] Expanding remote descriptor pydocs
by atagar＠torproject.org 22 Jul '13

22 Jul '13

commit 60ad230d88b383a259c531649fdba78d0acd6493 Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jul 21 09:03:39 2013 -0700 Expanding remote descriptor pydocs Adding documentation and examples for the remote descriptor API. This includes some minor API tweaks and a missing import statement. --- stem/descriptor/__init__.py | 1 + stem/descriptor/remote.py | 243 ++++++++++++++++++++++++++++----------- test/integ/descriptor/remote.py | 5 +- test/settings.cfg | 2 +- test/unit/descriptor/remote.py | 2 +- 5 files changed, 181 insertions(+), 72 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 82f846d..f1fdee4 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -154,6 +154,7 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen import stem.descriptor.server_descriptor import stem.descriptor.extrainfo_descriptor import stem.descriptor.networkstatus + import stem.descriptor.microdescriptor # The tor descriptor specifications do not provide a reliable method for # identifying a descriptor file's type and version so we need to guess diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 83416c8..8ec6f1d 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -2,14 +2,16 @@ # See LICENSE for licensing information """ -Utilities for retrieving descriptors from directory authorities and mirrors. -This is mostly done through the +Module for remotely retrieving descriptors from directory authorities and +mirrors. This is most easily done through the :class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues -:class:`~stem.descriptor.remote.Query` to get descriptor content. For -example... +:class:`~stem.descriptor.remote.Query` instances to get you the descriptor +content. For example... :: + from stem.descriptor.remote import DescriptorDownloader + downloader = DescriptorDownloader( cache = '/tmp/descriptor_cache', use_mirrors = True, @@ -27,7 +29,7 @@ example... print print "Query took %0.2f seconds" % query.runtime except Exception as exc: - print "Unable to query the server descriptors: %s" % query.error + print "Unable to retrieve the server descriptors: %s" % exc If you don't care about errors then you can also simply iterate over the query itself... @@ -37,6 +39,33 @@ itself... for desc in downloader.get_server_descriptors(): if desc.exit_policy.is_exiting_allowed(): print " %s (%s)" % (desc.nickname, desc.fingerprint) + +:: + + Query - Asynchronous request to download tor descriptors + |- start - issues the query if it isn't already running + +- run - blocks until the request is finished and provides the results + + DescriptorDownloader - Configurable class for issuing queries + |- use_directory_mirrors - use directory mirrors to download future descriptors + |- get_server_descriptors - provides present :class:`~stem.descriptor.stem.descriptor.server_descriptor.ServerDescriptor` + |- get_extrainfo_descriptors - provides present :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor` + |- get_microdescriptors - provides present :class:`~stem.descriptor.microdescriptor.Microdescriptor` + +- get_consensus - provides present :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` + +.. data:: MAX_DESCRIPTOR_BATCH_SIZE + + Maximum number of server or extrainfo descriptors that can requested at a + time by their fingerprints. + +.. data:: MAX_MICRODESCRIPTOR_BATCH_SIZE + + Maximum number of microdescriptors that can requested at a time by their + hashes. + +.. data:: DIRECTORY_AUTHORITIES + + Mapping of directory authority nicknames to their (address, dirport) tuple. """ import io @@ -51,8 +80,8 @@ import stem.descriptor from stem import Flag from stem.util import log -# Tor has a limit on the number of descriptors we can fetch explicitly by their -# fingerprint or hashes due to the url lenght of squid proxies. +# Tor has a limited number of descriptors we can fetch explicitly by their +# fingerprint or hashes due to a limit on the url length by squid proxies. MAX_DESCRIPTOR_BATCH_SIZE = 96 MAX_MICRODESCRIPTOR_BATCH_SIZE = 92 @@ -75,15 +104,87 @@ DIRECTORY_AUTHORITIES = { } +def _guess_descriptor_type(resource): + # Attempts to determine the descriptor type based on the resource url. This + # raises a ValueError if the resource isn't recognized. + + if resource.startswith('/tor/server/'): + return 'server-descriptor 1.0' + elif resource.startswith('/tor/extra/'): + return 'extra-info 1.0' + elif resource.startswith('/tor/micro/'): + return 'microdescriptor 1.0' + elif resource.startswith('/tor/status-vote/'): + return 'network-status-consensus-3 1.0' + else: + raise ValueError("Unable to determine the descriptor type for '%s'" % resource) + + class Query(object): """ Asynchronous request for descriptor content from a directory authority or - mirror. The caller can block on the response by either calling - :func:~stem.descriptor.remote.run: or iterating over our descriptor content. + mirror. These can either be made through the + :class:`~stem.descriptor.remote.DescriptorDownloader` or directly for more + advanced usage. + + To block on the response and get results either call + :func:`~stem.descriptor.remote.Query.run` or iterate over the Query. The + :func:`~stem.descriptor.remote.run` method pass along any errors that + arise... + + :: + + from stem.descriptor.remote import Query + + query = Query( + '/tor/server/all.z', + 'server-descriptor 1.0', + timeout = 30, + ) + + print "Current relays:" + + try: + for desc in query.run(): + print desc.fingerprint + except Exception as exc: + print "Unable to retrieve the server descriptors: %s" % exc + + ... while iterating fails silently... + + :: - :var str resource: resource being fetched, such as '/tor/status-vote/current/consensus.z' - :var str descriptor_type: type of descriptors being fetched, see - :func:`~stem.descriptor.__init__.parse_file` + print "Current relays:" + + for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'): + print desc.fingerprint + + In either case exceptions are available via our 'error' attribute. + + Tor provides quite a few different descriptor resources via its directory + protocol (see section 4.2 and later of the `dir-spec + <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_). + Commonly useful ones include... + + ======== =========== + Resource Description + ======== =========== + /tor/server/all.z all present server descriptors + /tor/server/fp/<fp1>+<fp2>+<fp3>.z server descriptors with the given fingerprints + /tor/extra/all.z all present extrainfo descriptors + /tor/extra/fp/<fp1>+<fp2>+<fp3>.z extrainfo descriptors with the given fingerprints + /tor/micro/d/<hash1>-<hash2>.z microdescriptors with the given hashes + /tor/status-vote/current/consensus.z present consensus + ======== =========== + + The '.z' suffix can be excluded to get a plaintext rather than compressed + response. Compression is handled transparently, so this shouldn't matter to + the caller. + + :var str resource: resource being fetched, such as '/tor/server/all.z' + :var str descriptor_type: type of descriptors being fetched (for options see + :func:`~stem.descriptor.__init__.parse_file`), this is guessed from the + resource if **None** :var list endpoints: (address, dirport) tuples of the authority or mirror we're querying, this uses authorities if undefined @@ -108,9 +209,16 @@ class Query(object): which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` """ - def __init__(self, resource, descriptor_type, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES): + def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES): + if not resource.startswith('/'): + raise ValueError("Resources should start with a '/': %s" % resource) + self.resource = resource - self.descriptor_type = descriptor_type + + if descriptor_type: + self.descriptor_type = descriptor_type + else: + self.descriptor_type = _guess_descriptor_type(resource) self.endpoints = endpoints if endpoints else [] self.retries = retries @@ -135,24 +243,6 @@ class Query(object): if start: self.start() - def pick_url(self, use_authority = False): - """ - Provides a url that can be queried. If we have multiple endpoints then one - will be picked randomly. - - :param bool use_authority: ignores our endpoints and uses a directory - authority instead - - :returns: **str** for the url being queried by this request - """ - - if use_authority or not self.endpoints: - address, dirport = random.choice(DIRECTORY_AUTHORITIES.values()) - else: - address, dirport = random.choice(self.endpoints) - - return "http://%s:%i/%s" % (address, dirport, self.resource.lstrip('/')) - def start(self): """ Starts downloading the scriptors if we haven't started already. @@ -160,7 +250,12 @@ class Query(object): with self._downloader_thread_lock: if self._downloader_thread is None: - self._downloader_thread = threading.Thread(target = self._download_descriptors, name="Descriptor Query", args = (self.retries,)) + self._downloader_thread = threading.Thread( + name = "Descriptor Query", + target = self._download_descriptors, + args = (self.retries,) + ) + self._downloader_thread.setDaemon(True) self._downloader_thread.start() @@ -190,38 +285,57 @@ class Query(object): self._downloader_thread.join() if self.error: - if not suppress: - raise self.error + if suppress: + return + + raise self.error else: if self._results is None: - if not suppress: - raise ValueError('BUG: _download_descriptors() finished without either results or an error') + if suppress: + return - return + raise ValueError('BUG: _download_descriptors() finished without either results or an error') try: for desc in self._results: yield desc except ValueError as exc: - # encountered a parsing error + self.error = exc # encountered a parsing error - self.error = exc + if suppress: + return - if not suppress: - raise self.error + raise self.error def __iter__(self): for desc in self.run(True): yield desc + def _pick_url(self, use_authority = False): + """ + Provides a url that can be queried. If we have multiple endpoints then one + will be picked randomly. + + :param bool use_authority: ignores our endpoints and uses a directory + authority instead + + :returns: **str** for the url being queried by this request + """ + + if use_authority or not self.endpoints: + address, dirport = random.choice(DIRECTORY_AUTHORITIES.values()) + else: + address, dirport = random.choice(self.endpoints) + + return "http://%s:%i/%s" % (address, dirport, self.resource.lstrip('/')) + def _download_descriptors(self, retries): try: use_authority = retries == 0 and self.fall_back_to_authority - self.download_url = self.pick_url(use_authority) + self.download_url = self._pick_url(use_authority) self.start_time = time.time() response = urllib2.urlopen(self.download_url, timeout = self.timeout) - self.runtime = time.time() - self.start_time # This sucks. We need to read the full response into memory before # processing the content. This is because urllib2 returns a 'file like' @@ -229,8 +343,9 @@ class Query(object): # own buffer that does support these. response = io.BytesIO(response.read().strip()) - self._results = stem.descriptor.parse_file(response, self.descriptor_type, validate = self.validate, document_handler = self.document_handler) + + self.runtime = time.time() - self.start_time log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime)) except: exc = sys.exc_info()[1] @@ -254,26 +369,27 @@ class DescriptorDownloader(object): For more advanced use cases you can use the :class:`~stem.descriptor.remote.Query` class directly. + :param bool use_mirrors: downloads the present consensus and uses the directory + mirrors to fetch future requests, this fails silently if the consensus + cannot be downloaded + :var int retries: number of times to attempt the request if it fails + :var bool fall_back_to_authority: when retrying request issues the last + request to a directory authority if **True** :var float timeout: duration before we'll time out our request, no timeout is applied if **None** - :var bool use_mirrors: downloads the present consensus and uses the directory - mirrors to fetch future requests, this fails silently if the consensus - cannot be downloaded :var bool start_when_requested: issues requests when our methods are called if **True**, otherwise provides non-running :class:`~stem.descriptor.remote.Query` instances :var bool validate: checks the validity of the descriptor's content if **True**, skips these checks otherwise - :var bool fall_back_to_authority: when retrying request issues the last - request to a directory authority if **True** """ - def __init__(self, retries = 2, use_mirrors = False, fall_back_to_authority = True, timeout = None, start_when_requested = True, validate = True): + def __init__(self, use_mirrors = False, retries = 2, fall_back_to_authority = True, timeout = None, start_when_requested = True, validate = True): self.retries = retries + self.fall_back_to_authority = fall_back_to_authority self.timeout = timeout self.start_when_requested = start_when_requested - self.fall_back_to_authority = fall_back_to_authority self.validate = validate self._endpoints = DIRECTORY_AUTHORITIES.values() @@ -282,7 +398,7 @@ class DescriptorDownloader(object): start_time = time.time() self.use_directory_mirrors() log.debug("Retrieve directory mirrors (took %0.2fs)" % (time.time() - start_time)) - except Exception, exc: + except Exception as exc: log.debug("Unable to retrieve directory mirrors: %s" % exc) def use_directory_mirrors(self): @@ -295,10 +411,7 @@ class DescriptorDownloader(object): new_endpoints = set(DIRECTORY_AUTHORITIES.values()) - query = self.get_consensus() - query.run() # running explicitly so we'll raise errors - - for desc in query: + for desc in self.get_consensus().run(): if Flag.V2DIR in desc.flags: new_endpoints.add((desc.address, desc.dir_port)) @@ -332,7 +445,7 @@ class DescriptorDownloader(object): resource = '/tor/server/fp/%s' % '+'.join(fingerprints) - return self._query(resource, 'server-descriptor 1.0') + return self._query(resource) def get_extrainfo_descriptors(self, fingerprints = None): """ @@ -360,7 +473,7 @@ class DescriptorDownloader(object): resource = '/tor/extra/fp/%s' % '+'.join(fingerprints) - return self._query(resource, 'extra-info 1.0') + return self._query(resource) def get_microdescriptors(self, hashes): """ @@ -385,9 +498,7 @@ class DescriptorDownloader(object): if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE: raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE) - resource = '/tor/micro/d/%s' % '-'.join(hashes) - - return self._query(resource, 'microdescriptor 1.0') + return self._query('/tor/micro/d/%s' % '-'.join(hashes)) def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None): """ @@ -409,13 +520,9 @@ class DescriptorDownloader(object): if authority_v3ident: resource += '/%s' % authority_v3ident - return self._query( - resource, - 'network-status-consensus-3 1.0', - document_handler = document_handler, - ) + return self._query(resource, document_handler = document_handler) - def _query(self, resource, descriptor_type, document_handler = stem.descriptor.DocumentHandler.ENTRIES): + def _query(self, resource, descriptor_type = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES): """ Issues a request for the given resource. """ diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py index e9d4e8a..7c45118 100644 --- a/test/integ/descriptor/remote.py +++ b/test/integ/descriptor/remote.py @@ -15,7 +15,8 @@ import test.runner import stem.descriptor.networkstatus -class TestDescriptorReader(unittest.TestCase): + +class TestDescriptorDownloader(unittest.TestCase): def test_using_authorities(self): """ Fetches a descriptor from each of the directory authorities. This is @@ -44,7 +45,7 @@ class TestDescriptorReader(unittest.TestCase): for query in queries: try: descriptors = list(query.run()) - except Exception, exc: + except Exception as exc: self.fail("Unable to use %s (%s:%s, %s): %s" % (authority, address, dirport, type(exc), exc)) self.assertEqual(1, len(descriptors)) diff --git a/test/settings.cfg b/test/settings.cfg index d0fd5b0..80aaf9a 100644 --- a/test/settings.cfg +++ b/test/settings.cfg @@ -187,7 +187,7 @@ test.integ_tests |test.integ.util.proc.TestProc |test.integ.util.system.TestSystem |test.integ.descriptor.reader.TestDescriptorReader -|test.integ.descriptor.remote.TestDescriptorReader +|test.integ.descriptor.remote.TestDescriptorDownloader |test.integ.descriptor.server_descriptor.TestServerDescriptor |test.integ.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor |test.integ.descriptor.microdescriptor.TestMicrodescriptor diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py index 3aadaac..fb2e3f0 100644 --- a/test/unit/descriptor/remote.py +++ b/test/unit/descriptor/remote.py @@ -71,7 +71,7 @@ class TestDescriptorDownloader(unittest.TestCase): ) expeced_url = 'http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020C…' - self.assertEqual(expeced_url, query.pick_url()) + self.assertEqual(expeced_url, query._pick_url()) descriptors = list(query) self.assertEqual(1, len(descriptors))

1 0

[stem/master] Support for compressed descriptors
by atagar＠torproject.org 22 Jul '13

22 Jul '13

commit 90ec648ed075245f6ed2bae42dd4c6968e15fab1 Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jul 21 15:57:47 2013 -0700 Support for compressed descriptors Ummm... well, that was easy. When I was initially looking into tor's directory responses I had trouble getting valid compressed results. Kostas suspected that the responses might have been truncated. Oh well, maybe a wget thing. In either case adding compression support to our module was trivial. --- stem/descriptor/remote.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 8ec6f1d..777f769 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -74,6 +74,7 @@ import sys import threading import time import urllib2 +import zlib import stem.descriptor @@ -335,15 +336,17 @@ class Query(object): self.download_url = self._pick_url(use_authority) self.start_time = time.time() - response = urllib2.urlopen(self.download_url, timeout = self.timeout) + response = urllib2.urlopen(self.download_url, timeout = self.timeout).read() - # This sucks. We need to read the full response into memory before - # processing the content. This is because urllib2 returns a 'file like' - # object that lacks tell() or seek(). Hence we need to read it into our - # own buffer that does support these. + if self.download_url.endswith('.z'): + response = zlib.decompress(response) - response = io.BytesIO(response.read().strip()) - self._results = stem.descriptor.parse_file(response, self.descriptor_type, validate = self.validate, document_handler = self.document_handler) + self._results = stem.descriptor.parse_file( + io.BytesIO(response.strip()), + self.descriptor_type, + validate = self.validate, + document_handler = self.document_handler, + ) self.runtime = time.time() - self.start_time log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime)) @@ -434,7 +437,7 @@ class DescriptorDownloader(object): fingerprints (this is due to a limit on the url length by squid proxies). """ - resource = '/tor/server/all' + resource = '/tor/server/all.z' if isinstance(fingerprints, str): fingerprints = [fingerprints] @@ -443,7 +446,7 @@ class DescriptorDownloader(object): if len(fingerprints) > MAX_DESCRIPTOR_BATCH_SIZE: raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE) - resource = '/tor/server/fp/%s' % '+'.join(fingerprints) + resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints) return self._query(resource) @@ -462,7 +465,7 @@ class DescriptorDownloader(object): fingerprints (this is due to a limit on the url length by squid proxies). """ - resource = '/tor/extra/all' + resource = '/tor/extra/all.z' if isinstance(fingerprints, str): fingerprints = [fingerprints] @@ -471,7 +474,7 @@ class DescriptorDownloader(object): if len(fingerprints) > MAX_DESCRIPTOR_BATCH_SIZE: raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE) - resource = '/tor/extra/fp/%s' % '+'.join(fingerprints) + resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints) return self._query(resource) @@ -498,7 +501,7 @@ class DescriptorDownloader(object): if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE: raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE) - return self._query('/tor/micro/d/%s' % '-'.join(hashes)) + return self._query('/tor/micro/d/%s.z' % '-'.join(hashes)) def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None): """ @@ -520,7 +523,7 @@ class DescriptorDownloader(object): if authority_v3ident: resource += '/%s' % authority_v3ident - return self._query(resource, document_handler = document_handler) + return self._query(resource + '.z', document_handler = document_handler) def _query(self, resource, descriptor_type = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES): """

1 0

[stem/master] Remote descriptor fetching module
by atagar＠torproject.org 22 Jul '13

22 Jul '13

commit 7f050ebfa475aeb39f6b1b4a0f7ad53620931646 Merge: b6c23b0 713b046 Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jul 21 18:39:17 2013 -0700 Remote descriptor fetching module What started as a tor-dev@ thread has finally accumulated in a surprisingly simple module. This provides the ability to download tor descriptors from directory mirrors or authorities, and is made of up of two classes... * Query The core of the module. This is a request to download a given descriptor resource from a pool of authorities and/or mirrors. This retries failed requests and can be iterated over to get the descriptor results. * DescriptorDownloader A configurable class for issuing Queries. This provides only two things: simple methods to get common descriptor types, and an easy way of specifying default Query parameters. https://lists.torproject.org/pipermail/tor-dev/2013-May/004924.html https://trac.torproject.org/8257 docs/api.rst | 5 +- docs/api/descriptor/remote.rst | 5 + docs/contents.rst | 1 + stem/control.py | 4 +- stem/descriptor/__init__.py | 21 +- stem/descriptor/extrainfo_descriptor.py | 18 +- stem/descriptor/microdescriptor.py | 13 +- stem/descriptor/networkstatus.py | 71 +++- stem/descriptor/remote.py | 581 +++++++++++++++++++++++++++++ stem/descriptor/router_status_entry.py | 16 +- stem/descriptor/server_descriptor.py | 29 +- test/integ/descriptor/remote.py | 210 +++++++++++ test/runner.py | 1 + test/settings.cfg | 4 +- test/unit/descriptor/remote.py | 125 +++++++ test/unit/descriptor/server_descriptor.py | 8 +- 16 files changed, 1056 insertions(+), 56 deletions(-)

1 0

[stem/master] Adding remote descriptor sphinx docs to site
by atagar＠torproject.org 22 Jul '13

22 Jul '13

commit 713b04632e6ca1120c6d5c2661206fd84c01904b Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jul 21 18:37:48 2013 -0700 Adding remote descriptor sphinx docs to site Tidying up our pydocs and including it in our site. --- docs/api.rst | 5 +++- docs/api/descriptor/remote.rst | 5 ++++ docs/contents.rst | 1 + stem/descriptor/remote.py | 62 +++++++++++++++++++++------------------- 4 files changed, 43 insertions(+), 30 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 92eb200..b6a5d8f 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -22,7 +22,9 @@ Descriptors To read descriptors from disk use :func:`~stem.descriptor.__init__.parse_file` for individual files and `stem.descriptor.reader -<api/descriptor/reader.html>`_ for batches. +<api/descriptor/reader.html>`_ for batches. You can also use +`stem.descriptor.remote <api/descriptor/remote.html>`_ to download descriptors +remotely like tor does. * **Classes** @@ -34,6 +36,7 @@ individual files and `stem.descriptor.reader * `stem.descriptor.router_status_entry <api/descriptor/router_status_entry.html>`_ - Relay entries within a network status document. * `stem.descriptor.reader <api/descriptor/reader.html>`_ - Reads and parses descriptor files from disk. +* `stem.descriptor.remote <api/descriptor/remote.html>`_ - Downloads descriptors from directory mirrors and authorities. * `stem.descriptor.export <api/descriptor/export.html>`_ - Exports descriptors to other formats. Utilities diff --git a/docs/api/descriptor/remote.rst b/docs/api/descriptor/remote.rst new file mode 100644 index 0000000..eb0d30a --- /dev/null +++ b/docs/api/descriptor/remote.rst @@ -0,0 +1,5 @@ +Descriptor Remote +================= + +.. automodule:: stem.descriptor.remote + diff --git a/docs/contents.rst b/docs/contents.rst index 834ce0a..7c9720b 100644 --- a/docs/contents.rst +++ b/docs/contents.rst @@ -35,6 +35,7 @@ Contents api/descriptor/export api/descriptor/reader + api/descriptor/remote api/util/conf api/util/connection diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 0aabd74..da1df63 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -13,8 +13,8 @@ content. For example... from stem.descriptor.remote import DescriptorDownloader downloader = DescriptorDownloader( - cache = '/tmp/descriptor_cache', use_mirrors = True, + timeout = 10, ) query = downloader.get_server_descriptors() @@ -48,19 +48,19 @@ itself... DescriptorDownloader - Configurable class for issuing queries |- use_directory_mirrors - use directory mirrors to download future descriptors - |- get_server_descriptors - provides present :class:`~stem.descriptor.stem.descriptor.server_descriptor.ServerDescriptor` - |- get_extrainfo_descriptors - provides present :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor` - |- get_microdescriptors - provides present :class:`~stem.descriptor.microdescriptor.Microdescriptor` - |- get_consensus - provides present :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` - |- get_key_certificates - provides present :class:`~stem.descriptor.networkstatus.KeyCertificate` + |- get_server_descriptors - provides present server descriptors + |- get_extrainfo_descriptors - provides present extrainfo descriptors + |- get_microdescriptors - provides present microdescriptors + |- get_consensus - provides the present consensus or router status entries + |- get_key_certificates - provides present authority key certificates +- query - request an arbitrary descriptor resource -.. data:: MAX_DESCRIPTOR_BATCH_SIZE +.. data:: MAX_FINGERPRINTS - Maximum number of server or extrainfo descriptors that can requested at a - time by their fingerprints. + Maximum number of descriptors that can requested at a time by their + fingerprints. -.. data:: MAX_MICRODESCRIPTOR_BATCH_SIZE +.. data:: MAX_MICRODESCRIPTOR_HASHES Maximum number of microdescriptors that can requested at a time by their hashes. @@ -86,8 +86,8 @@ from stem.util import log # Tor has a limited number of descriptors we can fetch explicitly by their # fingerprint or hashes due to a limit on the url length by squid proxies. -MAX_DESCRIPTOR_BATCH_SIZE = 96 -MAX_MICRODESCRIPTOR_BATCH_SIZE = 92 +MAX_FINGERPRINTS = 96 +MAX_MICRODESCRIPTOR_HASHES = 92 # Tor directory authorities as of commit f631b73 (7/4/13). This should only # include authorities with 'v3ident': @@ -134,7 +134,7 @@ class Query(object): To block on the response and get results either call :func:`~stem.descriptor.remote.Query.run` or iterate over the Query. The - :func:`~stem.descriptor.remote.run` method pass along any errors that + :func:`~stem.descriptor.remote.Query.run` method pass along any errors that arise... :: @@ -143,7 +143,7 @@ class Query(object): query = Query( '/tor/server/all.z', - 'server-descriptor 1.0', + descriptor_type = 'server-descriptor 1.0', timeout = 30, ) @@ -171,16 +171,18 @@ class Query(object): <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_). Commonly useful ones include... - ======== =========== + ===================================== =========== Resource Description - ======== =========== + ===================================== =========== /tor/server/all.z all present server descriptors /tor/server/fp/<fp1>+<fp2>+<fp3>.z server descriptors with the given fingerprints /tor/extra/all.z all present extrainfo descriptors /tor/extra/fp/<fp1>+<fp2>+<fp3>.z extrainfo descriptors with the given fingerprints /tor/micro/d/<hash1>-<hash2>.z microdescriptors with the given hashes /tor/status-vote/current/consensus.z present consensus - ======== =========== + /tor/keys/all.z key certificates for the authorities + /tor/keys/fp/<v3ident1>+<v3ident2>.z key certificates for specific authorities + ===================================== =========== The '.z' suffix can be excluded to get a plaintext rather than compressed response. Compression is handled transparently, so this shouldn't matter to @@ -211,7 +213,9 @@ class Query(object): :var bool validate: checks the validity of the descriptor's content if **True**, skips these checks otherwise :var stem.descriptor.__init__.DocumentHandler document_handler: method in - which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` + which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` + + :param bool start: start making the request when constructed (default is **True**) """ def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = True, timeout = None, start = True, validate = True, document_handler = stem.descriptor.DocumentHandler.ENTRIES): @@ -432,8 +436,8 @@ class DescriptorDownloader(object): fingerprints = [fingerprints] if fingerprints: - if len(fingerprints) > MAX_DESCRIPTOR_BATCH_SIZE: - raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE) + if len(fingerprints) > MAX_FINGERPRINTS: + raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS) resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints) @@ -462,8 +466,8 @@ class DescriptorDownloader(object): fingerprints = [fingerprints] if fingerprints: - if len(fingerprints) > MAX_DESCRIPTOR_BATCH_SIZE: - raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE) + if len(fingerprints) > MAX_FINGERPRINTS: + raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_FINGERPRINTS) resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints) @@ -491,21 +495,21 @@ class DescriptorDownloader(object): if isinstance(hashes, str): hashes = [hashes] - if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE: - raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE) + if len(hashes) > MAX_MICRODESCRIPTOR_HASHES: + raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_HASHES) return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args) - def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None, **query_args): + def get_consensus(self, authority_v3ident = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **query_args): """ Provides the present router status entries. - :param stem.descriptor.__init__.DocumentHandler document_handler: method in - which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3` :param str authority_v3ident: fingerprint of the authority key for which to get the consensus, see `'v3ident' in tor's config.c <https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_ for the values. + :param stem.descriptor.__init__.DocumentHandler document_handler: method in + which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3` :param query_args: additional arguments for the :class:`~stem.descriptor.remote.Query` constructor @@ -546,8 +550,8 @@ class DescriptorDownloader(object): authority_v3idents = [authority_v3idents] if authority_v3idents: - if len(authority_v3idents) > MAX_DESCRIPTOR_BATCH_SIZE: - raise ValueError("Unable to request more than %i key certificates at a time by their identity fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE) + if len(authority_v3idents) > MAX_FINGERPRINTS: + raise ValueError("Unable to request more than %i key certificates at a time by their identity fingerprints" % MAX_FINGERPRINTS) resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents)

1 0

[stem/master] Fixing "cannot unmarshal code" errors
by atagar＠torproject.org 22 Jul '13

22 Jul '13

commit d5b3ec93f44de01b21b27264e761fe8f09ec8012 Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jul 21 10:45:56 2013 -0700 Fixing "cannot unmarshal code" errors Damnit python, your import scheme is stupidly confusing. The descriptor's __init__ module has a circular dependency with its contents. This is because the parse_file() function calls the constituent modules, while those modules need the Descriptor class from __init__. So far so good. Only trouble is that python's support for circular dependencies sucks. To address this I did lazy imports in __init__, so we imported within the parse_file() function. On the surface this seemed to work. All the tests certainly passed. The trouble is that this style of python import is buggy as hell. Turns out that lazy imports leave the module in question in a unexecutable state so this *only* works if you've also imported the module another time during the interpretor execution. Our tests did this, hence passing tests. I first encuntered "cannot unmarshal code" while writing the remote descritpor tests (both unit and integ). I was content to hack around this with superfluous import statements while this only manifested within the tests, but now I'm seeing it during general usage too... >>> from stem.descriptor.remote import DescriptorDownloader >>> list(DescriptorDownloader().get_microdescriptors('jzcx+1fHsi47Tu+vQIcyItgn4lKs6aKnFshQ0lZ2JTg')) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "stem/descriptor/remote.py", line 311, in __iter__ for desc in self.run(True): File "stem/descriptor/remote.py", line 300, in run for desc in self._results: File "stem/descriptor/__init__.py", line 154, in parse_file import stem.descriptor.server_descriptor RuntimeError: cannot unmarshal code objects in restricted execution mode Joy. After much head scratching and forum reading it sounds like there's something magical about 'from' imports so switching the descriptor modules to that, and moving the __init__ imports to the end. I'm not entirely clear on the magic going on here, but its elmiminated the errors. --- stem/descriptor/__init__.py | 15 ++++++------ stem/descriptor/extrainfo_descriptor.py | 18 +++++++++----- stem/descriptor/microdescriptor.py | 13 ++++++---- stem/descriptor/networkstatus.py | 40 ++++++++++++++++++------------- stem/descriptor/router_status_entry.py | 16 +++++++++---- stem/descriptor/server_descriptor.py | 26 ++++++++++++-------- test/integ/descriptor/remote.py | 4 ---- test/unit/descriptor/remote.py | 7 ------ 8 files changed, 78 insertions(+), 61 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index f1fdee4..e3b5a8b 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -151,11 +151,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen return - import stem.descriptor.server_descriptor - import stem.descriptor.extrainfo_descriptor - import stem.descriptor.networkstatus - import stem.descriptor.microdescriptor - # The tor descriptor specifications do not provide a reliable method for # identifying a descriptor file's type and version so we need to guess # based on its filename. Metrics descriptors, however, can be identified @@ -217,9 +212,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler): # Parses descriptor files from metrics, yielding individual descriptors. This # throws a TypeError if the descriptor_type or version isn't recognized. - import stem.descriptor.server_descriptor - import stem.descriptor.extrainfo_descriptor - import stem.descriptor.networkstatus if descriptor_type == "server-descriptor" and major_version == 1: for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate): @@ -541,3 +533,10 @@ def _get_descriptor_components(raw_contents, validate, extra_keywords = ()): return entries, extra_entries else: return entries + +# importing at the end to avoid circular dependencies on our Descriptor class + +import stem.descriptor.server_descriptor +import stem.descriptor.extrainfo_descriptor +import stem.descriptor.networkstatus +import stem.descriptor.microdescriptor diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index e9aea30..fac0991 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -72,11 +72,17 @@ import datetime import hashlib import re -import stem.descriptor import stem.util.connection import stem.util.enum import stem.util.str_tools +from stem.descriptor import ( + PGP_BLOCK_END, + Descriptor, + _read_until_keywords, + _get_descriptor_components, +) + # known statuses for dirreq-v2-resp and dirreq-v3-resp... DirResponse = stem.util.enum.Enum( ("OK", "ok"), @@ -156,11 +162,11 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True): """ while True: - extrainfo_content = stem.descriptor._read_until_keywords("router-signature", descriptor_file) + extrainfo_content = _read_until_keywords("router-signature", descriptor_file) # we've reached the 'router-signature', now include the pgp style block - block_end_prefix = stem.descriptor.PGP_BLOCK_END.split(' ', 1)[0] - extrainfo_content += stem.descriptor._read_until_keywords(block_end_prefix, descriptor_file, True) + block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] + extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True) if extrainfo_content: if is_bridge: @@ -205,7 +211,7 @@ def _parse_timestamp_and_interval(keyword, content): raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line)) -class ExtraInfoDescriptor(stem.descriptor.Descriptor): +class ExtraInfoDescriptor(Descriptor): """ Extra-info descriptor document. @@ -400,7 +406,7 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor): self._unrecognized_lines = [] - entries = stem.descriptor._get_descriptor_components(raw_contents, validate) + entries = _get_descriptor_components(raw_contents, validate) if validate: for keyword in self._required_fields(): diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py index 499e170..5834e18 100644 --- a/stem/descriptor/microdescriptor.py +++ b/stem/descriptor/microdescriptor.py @@ -66,10 +66,15 @@ Doing the same is trivial with server descriptors... import hashlib -import stem.descriptor import stem.descriptor.router_status_entry import stem.exit_policy +from stem.descriptor import ( + Descriptor, + _get_descriptor_components, + _read_until_keywords, +) + REQUIRED_FIELDS = ( "onion-key", ) @@ -99,7 +104,7 @@ def _parse_file(descriptor_file, validate = True): """ while True: - annotations = stem.descriptor._read_until_keywords("onion-key", descriptor_file) + annotations = _read_until_keywords("onion-key", descriptor_file) # read until we reach an annotation or onion-key line descriptor_lines = [] @@ -136,7 +141,7 @@ def _parse_file(descriptor_file, validate = True): break # done parsing descriptors -class Microdescriptor(stem.descriptor.Descriptor): +class Microdescriptor(Descriptor): """ Microdescriptor (`descriptor specification <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_) @@ -174,7 +179,7 @@ class Microdescriptor(stem.descriptor.Descriptor): self._annotation_lines = annotations if annotations else [] self._annotation_dict = None # cached breakdown of key/value mappings - entries = stem.descriptor._get_descriptor_components(raw_contents, validate) + entries = _get_descriptor_components(raw_contents, validate) self._parse(entries, validate) if validate: diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index f65c7dc..baf7f0a 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -49,12 +49,18 @@ For more information see :func:`~stem.descriptor.__init__.DocumentHandler`... import datetime import io -import stem.descriptor import stem.descriptor.router_status_entry import stem.util.str_tools import stem.util.tor_tools import stem.version +from stem.descriptor import ( + Descriptor, + DocumentHandler, + _get_descriptor_components, + _read_until_keywords, +) + # Version 2 network status document fields, tuples of the form... # (keyword, is_mandatory) @@ -138,7 +144,7 @@ KEY_CERTIFICATE_PARAMS = ( ) -def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES): +def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = DocumentHandler.ENTRIES): """ Parses a network status and iterates over the RouterStatusEntry in it. The document that these instances reference have an empty 'routers' attribute to @@ -180,24 +186,24 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde else: raise ValueError("Document type %i isn't recognized (only able to parse v2, v3, and bridge)" % document_type) - if document_handler == stem.descriptor.DocumentHandler.DOCUMENT: + if document_handler == DocumentHandler.DOCUMENT: yield document_type(document_file.read(), validate) return # getting the document without the routers section - header = stem.descriptor._read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file) + header = _read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file) routers_start = document_file.tell() - stem.descriptor._read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True) + _read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True) routers_end = document_file.tell() footer = document_file.readlines() document_content = bytes.join(b"", header + footer) - if document_handler == stem.descriptor.DocumentHandler.BARE_DOCUMENT: + if document_handler == DocumentHandler.BARE_DOCUMENT: yield document_type(document_content, validate) - elif document_handler == stem.descriptor.DocumentHandler.ENTRIES: + elif document_handler == DocumentHandler.ENTRIES: desc_iterator = stem.descriptor.router_status_entry._parse_file( document_file, validate, @@ -214,7 +220,7 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde raise ValueError("Unrecognized document_handler: %s" % document_handler) -class NetworkStatusDocument(stem.descriptor.Descriptor): +class NetworkStatusDocument(Descriptor): """ Common parent for network status documents. """ @@ -281,7 +287,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): # deprecated descriptor type - patches welcome if you want those checks. document_file = io.BytesIO(raw_content) - document_content = bytes.join(b"", stem.descriptor._read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file)) + document_content = bytes.join(b"", _read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file)) router_iter = stem.descriptor.router_status_entry._parse_file( document_file, @@ -297,7 +303,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): document_content += b"\n" + document_file.read() document_content = stem.util.str_tools._to_unicode(document_content) - entries = stem.descriptor._get_descriptor_components(document_content, validate) + entries = _get_descriptor_components(document_content, validate) if validate: self._check_constraints(entries) @@ -556,9 +562,9 @@ class _DocumentHeader(object): self._unrecognized_lines = [] - content = bytes.join(b"", stem.descriptor._read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) + content = bytes.join(b"", _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) content = stem.util.str_tools._to_unicode(content) - entries = stem.descriptor._get_descriptor_components(content, validate) + entries = _get_descriptor_components(content, validate) self._parse(entries, validate) # doing this validation afterward so we know our 'is_consensus' and @@ -792,7 +798,7 @@ class _DocumentFooter(object): if not content: return # footer is optional and there's nothing to parse - entries = stem.descriptor._get_descriptor_components(content, validate) + entries = _get_descriptor_components(content, validate) self._parse(entries, validate, header) if validate: @@ -948,7 +954,7 @@ def _parse_int_mappings(keyword, value, validate): return results -class DirectoryAuthority(stem.descriptor.Descriptor): +class DirectoryAuthority(Descriptor): """ Directory authority information obtained from a v3 network status document. @@ -1034,7 +1040,7 @@ class DirectoryAuthority(stem.descriptor.Descriptor): else: key_cert_content = None - entries = stem.descriptor._get_descriptor_components(content, validate) + entries = _get_descriptor_components(content, validate) if validate and 'dir-source' != entries.keys()[0]: raise ValueError("Authority entries are expected to start with a 'dir-source' line:\n%s" % (content)) @@ -1168,7 +1174,7 @@ class DirectoryAuthority(stem.descriptor.Descriptor): return self._compare(other, lambda s, o: s <= o) -class KeyCertificate(stem.descriptor.Descriptor): +class KeyCertificate(Descriptor): """ Directory key certificate for a v3 network status document. @@ -1216,7 +1222,7 @@ class KeyCertificate(stem.descriptor.Descriptor): :raises: **ValueError** if a validity check fails """ - entries = stem.descriptor._get_descriptor_components(content, validate) + entries = _get_descriptor_components(content, validate) if validate: if 'dir-key-certificate-version' != entries.keys()[0]: diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py index 076baab..80b6623 100644 --- a/stem/descriptor/router_status_entry.py +++ b/stem/descriptor/router_status_entry.py @@ -23,10 +23,16 @@ import base64 import binascii import datetime -import stem.descriptor import stem.exit_policy import stem.util.str_tools +from stem.descriptor import ( + KEYWORD_LINE, + Descriptor, + _get_descriptor_components, + _read_until_keywords, +) + def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start_position = None, end_position = None, section_end_keywords = (), extra_args = ()): """ @@ -64,7 +70,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start # check if we're starting at the end of the section (ie, there's no entries to read) if section_end_keywords: first_keyword = None - line_match = stem.descriptor.KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline())) + line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline())) if line_match: first_keyword = line_match.groups()[0] @@ -75,7 +81,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start return while end_position is None or document_file.tell() < end_position: - desc_lines, ending_keyword = stem.descriptor._read_until_keywords( + desc_lines, ending_keyword = _read_until_keywords( (entry_keyword,) + section_end_keywords, document_file, ignore_first = True, @@ -95,7 +101,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start break -class RouterStatusEntry(stem.descriptor.Descriptor): +class RouterStatusEntry(Descriptor): """ Information about an individual router stored within a network status document. This is the common parent for concrete status entry types. @@ -147,7 +153,7 @@ class RouterStatusEntry(stem.descriptor.Descriptor): self._unrecognized_lines = [] - entries = stem.descriptor._get_descriptor_components(content, validate) + entries = _get_descriptor_components(content, validate) if validate: self._check_constraints(entries) diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index d23f41e..39d4645 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -34,7 +34,6 @@ import datetime import hashlib import re -import stem.descriptor import stem.descriptor.extrainfo_descriptor import stem.exit_policy import stem.prereq @@ -45,6 +44,14 @@ import stem.version from stem.util import log +from stem.descriptor import ( + PGP_BLOCK_END, + Descriptor, + _get_bytes_field, + _get_descriptor_components, + _read_until_keywords, +) + # relay descriptors must have exactly one of the following REQUIRED_FIELDS = ( "router", @@ -118,12 +125,12 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True): # to the caller). while True: - annotations = stem.descriptor._read_until_keywords("router", descriptor_file) - descriptor_content = stem.descriptor._read_until_keywords("router-signature", descriptor_file) + annotations = _read_until_keywords("router", descriptor_file) + descriptor_content = _read_until_keywords("router-signature", descriptor_file) # we've reached the 'router-signature', now include the pgp style block - block_end_prefix = stem.descriptor.PGP_BLOCK_END.split(' ', 1)[0] - descriptor_content += stem.descriptor._read_until_keywords(block_end_prefix, descriptor_file, True) + block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] + descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True) if descriptor_content: # strip newlines from annotations @@ -142,7 +149,7 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True): break # done parsing descriptors -class ServerDescriptor(stem.descriptor.Descriptor): +class ServerDescriptor(Descriptor): """ Common parent for server descriptors. @@ -216,8 +223,8 @@ class ServerDescriptor(stem.descriptor.Descriptor): # Only a few things can be arbitrary bytes according to the dir-spec, so # parsing them separately. - self.platform = stem.descriptor._get_bytes_field("platform", raw_contents) - self.contact = stem.descriptor._get_bytes_field("contact", raw_contents) + self.platform = _get_bytes_field("platform", raw_contents) + self.contact = _get_bytes_field("contact", raw_contents) raw_contents = stem.util.str_tools._to_unicode(raw_contents) @@ -272,8 +279,7 @@ class ServerDescriptor(stem.descriptor.Descriptor): # influences the resulting exit policy, but for everything else the order # does not matter so breaking it into key / value pairs. - entries, policy = \ - stem.descriptor._get_descriptor_components(raw_contents, validate, ("accept", "reject")) + entries, policy = _get_descriptor_components(raw_contents, validate, ("accept", "reject")) self.exit_policy = stem.exit_policy.ExitPolicy(*policy) self._parse(entries, validate) diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py index 7c45118..b3c549d 100644 --- a/test/integ/descriptor/remote.py +++ b/test/integ/descriptor/remote.py @@ -11,10 +11,6 @@ import stem.descriptor.router_status_entry import stem.descriptor.server_descriptor import test.runner -# Required to prevent unmarshal error when running this test alone. - -import stem.descriptor.networkstatus - class TestDescriptorDownloader(unittest.TestCase): def test_using_authorities(self): diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py index fb2e3f0..3ea303b 100644 --- a/test/unit/descriptor/remote.py +++ b/test/unit/descriptor/remote.py @@ -10,13 +10,6 @@ import stem.descriptor.remote from mock import patch -# The following isn't used by this directly, but we're still importing it due -# to a screwy aspect of how mock works. If patched() results in an import that -# we haven't done before then we can fail with a RuntimeError. In practice this -# just arises if we run this unit test on its own. - -import stem.descriptor.networkstatus - # Output from requesting moria1's descriptor from itself... # % curl http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020C…

1 0

[stem/master] Implementing a get_key_certificates() method
by atagar＠torproject.org 22 Jul '13

22 Jul '13

commit 4d122b1a417fe318f0bb3d169688d25f3e749754 Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jul 21 17:10:26 2013 -0700 Implementing a get_key_certificates() method Method for fetching key certificates for the authorities. This included a little work so parse_file() could return multiple certificates when they're concatenated together. --- stem/descriptor/__init__.py | 3 ++- stem/descriptor/networkstatus.py | 31 +++++++++++++++++++++++++++++++ stem/descriptor/remote.py | 36 ++++++++++++++++++++++++++++++++++++ test/integ/descriptor/remote.py | 30 ++++++++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 1 deletion(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index e3b5a8b..14b29d1 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -237,7 +237,8 @@ def _parse_metrics_file(descriptor_type, major_version, minor_version, descripto for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler): yield desc elif descriptor_type == "dir-key-certificate-3" and major_version == 1: - yield stem.descriptor.networkstatus.KeyCertificate(descriptor_file.read(), validate = validate) + for desc in stem.descriptor.networkstatus._parse_file_key_certs(descriptor_file, validate = validate): + yield desc elif descriptor_type in ("network-status-consensus-3", "network-status-vote-3") and major_version == 1: document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3 diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index baf7f0a..ec21304 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -55,6 +55,7 @@ import stem.util.tor_tools import stem.version from stem.descriptor import ( + PGP_BLOCK_END, Descriptor, DocumentHandler, _get_descriptor_components, @@ -220,6 +221,36 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde raise ValueError("Unrecognized document_handler: %s" % document_handler) +def _parse_file_key_certs(certificate_file, validate = True): + """ + Parses a file containing one or more authority key certificates. + + :param file certificate_file: file with key certificates + :param bool validate: checks the validity of the certificate's contents if + **True**, skips these checks otherwise + + :returns: iterator for :class:`stem.descriptor.networkstatus.KeyCertificate` + instance in the file + + :raises: + * **ValueError** if the key certificate content is invalid and validate is + **True** + * **IOError** if the file can't be read + """ + + while True: + keycert_content = _read_until_keywords("dir-key-certification", certificate_file) + + # we've reached the 'router-signature', now include the pgp style block + block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] + keycert_content += _read_until_keywords(block_end_prefix, certificate_file, True) + + if keycert_content: + yield stem.descriptor.networkstatus.KeyCertificate(bytes.join(b"", keycert_content), validate = validate) + else: + break # done parsing file + + class NetworkStatusDocument(Descriptor): """ Common parent for network status documents. diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index a009078..0aabd74 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -52,6 +52,7 @@ itself... |- get_extrainfo_descriptors - provides present :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor` |- get_microdescriptors - provides present :class:`~stem.descriptor.microdescriptor.Microdescriptor` |- get_consensus - provides present :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` + |- get_key_certificates - provides present :class:`~stem.descriptor.networkstatus.KeyCertificate` +- query - request an arbitrary descriptor resource .. data:: MAX_DESCRIPTOR_BATCH_SIZE @@ -118,6 +119,8 @@ def _guess_descriptor_type(resource): return 'microdescriptor 1.0' elif resource.startswith('/tor/status-vote/'): return 'network-status-consensus-3 1.0' + elif resource.startswith('/tor/keys/'): + return 'dir-key-certificate-3 1.0' else: raise ValueError("Unable to determine the descriptor type for '%s'" % resource) @@ -517,6 +520,39 @@ class DescriptorDownloader(object): return self.query(resource + '.z', document_handler = document_handler, **query_args) + def get_key_certificates(self, authority_v3idents = None, **query_args): + """ + Provides the key certificates for authorities with the given fingerprints. + If no fingerprints are provided then this returns all present key + certificates. + + :param str authority_v3idents: fingerprint or list of fingerprints of the + authority keys, see `'v3ident' in tor's config.c + <https://gitweb.torproject.org/tor.git/blob/f631b73:/src/or/config.c#l816>`_ + for the values. + :param query_args: additional arguments for the + :class:`~stem.descriptor.remote.Query` constructor + + :returns: :class:`~stem.descriptor.remote.Query` for the key certificates + + :raises: **ValueError** if we request more than 96 key certificates by + their identity fingerprints (this is due to a limit on the url length by + squid proxies). + """ + + resource = '/tor/keys/all.z' + + if isinstance(authority_v3idents, str): + authority_v3idents = [authority_v3idents] + + if authority_v3idents: + if len(authority_v3idents) > MAX_DESCRIPTOR_BATCH_SIZE: + raise ValueError("Unable to request more than %i key certificates at a time by their identity fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE) + + resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents) + + return self.query(resource, **query_args) + def query(self, resource, **query_args): """ Issues a request for the given resource. diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py index b3c549d..d11a4ae 100644 --- a/test/integ/descriptor/remote.py +++ b/test/integ/descriptor/remote.py @@ -6,6 +6,7 @@ import unittest import stem.descriptor.extrainfo_descriptor import stem.descriptor.microdescriptor +import stem.descriptor.networkstatus import stem.descriptor.remote import stem.descriptor.router_status_entry import stem.descriptor.server_descriptor @@ -178,3 +179,32 @@ class TestDescriptorDownloader(unittest.TestCase): consensus = list(consensus_query) self.assertTrue(len(consensus) > 50) self.assertTrue(isinstance(consensus[0], stem.descriptor.router_status_entry.RouterStatusEntryV3)) + + def test_get_key_certificates(self): + """ + Exercises the downloader's get_key_certificates() method. + """ + + if test.runner.require_online(self): + return + elif test.runner.only_run_once(self, "test_get_key_certificates"): + return + + downloader = stem.descriptor.remote.DescriptorDownloader() + + single_query = downloader.get_key_certificates('D586D18309DED4CD6D57C18FDB97EFA96D330566') + + multiple_query = downloader.get_key_certificates([ + 'D586D18309DED4CD6D57C18FDB97EFA96D330566', + '14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4', + ]) + + single_query.run() + multiple_query.run() + + single_query_results = list(single_query) + self.assertEqual(1, len(single_query_results)) + self.assertEqual('D586D18309DED4CD6D57C18FDB97EFA96D330566', single_query_results[0].fingerprint) + self.assertTrue(isinstance(single_query_results[0], stem.descriptor.networkstatus.KeyCertificate)) + + self.assertEqual(2, len(list(multiple_query)))

1 0