commit d5b3ec93f44de01b21b27264e761fe8f09ec8012 Author: Damian Johnson atagar@torproject.org Date: Sun Jul 21 10:45:56 2013 -0700
Fixing "cannot unmarshal code" errors
Damnit python, your import scheme is stupidly confusing.
The descriptor's __init__ module has a circular dependency with its contents. This is because the parse_file() function calls the constituent modules, while those modules need the Descriptor class from __init__.
So far so good. Only trouble is that python's support for circular dependencies sucks. To address this I did lazy imports in __init__, so we imported within the parse_file() function.
On the surface this seemed to work. All the tests certainly passed. The trouble is that this style of python import is buggy as hell. Turns out that lazy imports leave the module in question in a unexecutable state so this *only* works if you've also imported the module another time during the interpretor execution. Our tests did this, hence passing tests.
I first encuntered "cannot unmarshal code" while writing the remote descritpor tests (both unit and integ). I was content to hack around this with superfluous import statements while this only manifested within the tests, but now I'm seeing it during general usage too...
>>> from stem.descriptor.remote import DescriptorDownloader >>> list(DescriptorDownloader().get_microdescriptors('jzcx+1fHsi47Tu+vQIcyItgn4lKs6aKnFshQ0lZ2JTg')) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "stem/descriptor/remote.py", line 311, in __iter__ for desc in self.run(True): File "stem/descriptor/remote.py", line 300, in run for desc in self._results: File "stem/descriptor/__init__.py", line 154, in parse_file import stem.descriptor.server_descriptor RuntimeError: cannot unmarshal code objects in restricted execution mode
Joy. After much head scratching and forum reading it sounds like there's something magical about 'from' imports so switching the descriptor modules to that, and moving the __init__ imports to the end. I'm not entirely clear on the magic going on here, but its elmiminated the errors. --- stem/descriptor/__init__.py | 15 ++++++------ stem/descriptor/extrainfo_descriptor.py | 18 +++++++++----- stem/descriptor/microdescriptor.py | 13 ++++++---- stem/descriptor/networkstatus.py | 40 ++++++++++++++++++------------- stem/descriptor/router_status_entry.py | 16 +++++++++---- stem/descriptor/server_descriptor.py | 26 ++++++++++++-------- test/integ/descriptor/remote.py | 4 ---- test/unit/descriptor/remote.py | 7 ------ 8 files changed, 78 insertions(+), 61 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index f1fdee4..e3b5a8b 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -151,11 +151,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
return
- import stem.descriptor.server_descriptor - import stem.descriptor.extrainfo_descriptor - import stem.descriptor.networkstatus - import stem.descriptor.microdescriptor - # The tor descriptor specifications do not provide a reliable method for # identifying a descriptor file's type and version so we need to guess # based on its filename. Metrics descriptors, however, can be identified @@ -217,9 +212,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler): # Parses descriptor files from metrics, yielding individual descriptors. This # throws a TypeError if the descriptor_type or version isn't recognized. - import stem.descriptor.server_descriptor - import stem.descriptor.extrainfo_descriptor - import stem.descriptor.networkstatus
if descriptor_type == "server-descriptor" and major_version == 1: for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate): @@ -541,3 +533,10 @@ def _get_descriptor_components(raw_contents, validate, extra_keywords = ()): return entries, extra_entries else: return entries + +# importing at the end to avoid circular dependencies on our Descriptor class + +import stem.descriptor.server_descriptor +import stem.descriptor.extrainfo_descriptor +import stem.descriptor.networkstatus +import stem.descriptor.microdescriptor diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index e9aea30..fac0991 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -72,11 +72,17 @@ import datetime import hashlib import re
-import stem.descriptor import stem.util.connection import stem.util.enum import stem.util.str_tools
+from stem.descriptor import ( + PGP_BLOCK_END, + Descriptor, + _read_until_keywords, + _get_descriptor_components, +) + # known statuses for dirreq-v2-resp and dirreq-v3-resp... DirResponse = stem.util.enum.Enum( ("OK", "ok"), @@ -156,11 +162,11 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True): """
while True: - extrainfo_content = stem.descriptor._read_until_keywords("router-signature", descriptor_file) + extrainfo_content = _read_until_keywords("router-signature", descriptor_file)
# we've reached the 'router-signature', now include the pgp style block - block_end_prefix = stem.descriptor.PGP_BLOCK_END.split(' ', 1)[0] - extrainfo_content += stem.descriptor._read_until_keywords(block_end_prefix, descriptor_file, True) + block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] + extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
if extrainfo_content: if is_bridge: @@ -205,7 +211,7 @@ def _parse_timestamp_and_interval(keyword, content): raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))
-class ExtraInfoDescriptor(stem.descriptor.Descriptor): +class ExtraInfoDescriptor(Descriptor): """ Extra-info descriptor document.
@@ -400,7 +406,7 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
self._unrecognized_lines = []
- entries = stem.descriptor._get_descriptor_components(raw_contents, validate) + entries = _get_descriptor_components(raw_contents, validate)
if validate: for keyword in self._required_fields(): diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py index 499e170..5834e18 100644 --- a/stem/descriptor/microdescriptor.py +++ b/stem/descriptor/microdescriptor.py @@ -66,10 +66,15 @@ Doing the same is trivial with server descriptors...
import hashlib
-import stem.descriptor import stem.descriptor.router_status_entry import stem.exit_policy
+from stem.descriptor import ( + Descriptor, + _get_descriptor_components, + _read_until_keywords, +) + REQUIRED_FIELDS = ( "onion-key", ) @@ -99,7 +104,7 @@ def _parse_file(descriptor_file, validate = True): """
while True: - annotations = stem.descriptor._read_until_keywords("onion-key", descriptor_file) + annotations = _read_until_keywords("onion-key", descriptor_file)
# read until we reach an annotation or onion-key line descriptor_lines = [] @@ -136,7 +141,7 @@ def _parse_file(descriptor_file, validate = True): break # done parsing descriptors
-class Microdescriptor(stem.descriptor.Descriptor): +class Microdescriptor(Descriptor): """ Microdescriptor (`descriptor specification https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt`_) @@ -174,7 +179,7 @@ class Microdescriptor(stem.descriptor.Descriptor): self._annotation_lines = annotations if annotations else [] self._annotation_dict = None # cached breakdown of key/value mappings
- entries = stem.descriptor._get_descriptor_components(raw_contents, validate) + entries = _get_descriptor_components(raw_contents, validate) self._parse(entries, validate)
if validate: diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index f65c7dc..baf7f0a 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -49,12 +49,18 @@ For more information see :func:`~stem.descriptor.__init__.DocumentHandler`... import datetime import io
-import stem.descriptor import stem.descriptor.router_status_entry import stem.util.str_tools import stem.util.tor_tools import stem.version
+from stem.descriptor import ( + Descriptor, + DocumentHandler, + _get_descriptor_components, + _read_until_keywords, +) + # Version 2 network status document fields, tuples of the form... # (keyword, is_mandatory)
@@ -138,7 +144,7 @@ KEY_CERTIFICATE_PARAMS = ( )
-def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES): +def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = DocumentHandler.ENTRIES): """ Parses a network status and iterates over the RouterStatusEntry in it. The document that these instances reference have an empty 'routers' attribute to @@ -180,24 +186,24 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde else: raise ValueError("Document type %i isn't recognized (only able to parse v2, v3, and bridge)" % document_type)
- if document_handler == stem.descriptor.DocumentHandler.DOCUMENT: + if document_handler == DocumentHandler.DOCUMENT: yield document_type(document_file.read(), validate) return
# getting the document without the routers section
- header = stem.descriptor._read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file) + header = _read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file)
routers_start = document_file.tell() - stem.descriptor._read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True) + _read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True) routers_end = document_file.tell()
footer = document_file.readlines() document_content = bytes.join(b"", header + footer)
- if document_handler == stem.descriptor.DocumentHandler.BARE_DOCUMENT: + if document_handler == DocumentHandler.BARE_DOCUMENT: yield document_type(document_content, validate) - elif document_handler == stem.descriptor.DocumentHandler.ENTRIES: + elif document_handler == DocumentHandler.ENTRIES: desc_iterator = stem.descriptor.router_status_entry._parse_file( document_file, validate, @@ -214,7 +220,7 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde raise ValueError("Unrecognized document_handler: %s" % document_handler)
-class NetworkStatusDocument(stem.descriptor.Descriptor): +class NetworkStatusDocument(Descriptor): """ Common parent for network status documents. """ @@ -281,7 +287,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): # deprecated descriptor type - patches welcome if you want those checks.
document_file = io.BytesIO(raw_content) - document_content = bytes.join(b"", stem.descriptor._read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file)) + document_content = bytes.join(b"", _read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file))
router_iter = stem.descriptor.router_status_entry._parse_file( document_file, @@ -297,7 +303,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): document_content += b"\n" + document_file.read() document_content = stem.util.str_tools._to_unicode(document_content)
- entries = stem.descriptor._get_descriptor_components(document_content, validate) + entries = _get_descriptor_components(document_content, validate)
if validate: self._check_constraints(entries) @@ -556,9 +562,9 @@ class _DocumentHeader(object):
self._unrecognized_lines = []
- content = bytes.join(b"", stem.descriptor._read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) + content = bytes.join(b"", _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) content = stem.util.str_tools._to_unicode(content) - entries = stem.descriptor._get_descriptor_components(content, validate) + entries = _get_descriptor_components(content, validate) self._parse(entries, validate)
# doing this validation afterward so we know our 'is_consensus' and @@ -792,7 +798,7 @@ class _DocumentFooter(object): if not content: return # footer is optional and there's nothing to parse
- entries = stem.descriptor._get_descriptor_components(content, validate) + entries = _get_descriptor_components(content, validate) self._parse(entries, validate, header)
if validate: @@ -948,7 +954,7 @@ def _parse_int_mappings(keyword, value, validate): return results
-class DirectoryAuthority(stem.descriptor.Descriptor): +class DirectoryAuthority(Descriptor): """ Directory authority information obtained from a v3 network status document.
@@ -1034,7 +1040,7 @@ class DirectoryAuthority(stem.descriptor.Descriptor): else: key_cert_content = None
- entries = stem.descriptor._get_descriptor_components(content, validate) + entries = _get_descriptor_components(content, validate)
if validate and 'dir-source' != entries.keys()[0]: raise ValueError("Authority entries are expected to start with a 'dir-source' line:\n%s" % (content)) @@ -1168,7 +1174,7 @@ class DirectoryAuthority(stem.descriptor.Descriptor): return self._compare(other, lambda s, o: s <= o)
-class KeyCertificate(stem.descriptor.Descriptor): +class KeyCertificate(Descriptor): """ Directory key certificate for a v3 network status document.
@@ -1216,7 +1222,7 @@ class KeyCertificate(stem.descriptor.Descriptor): :raises: **ValueError** if a validity check fails """
- entries = stem.descriptor._get_descriptor_components(content, validate) + entries = _get_descriptor_components(content, validate)
if validate: if 'dir-key-certificate-version' != entries.keys()[0]: diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py index 076baab..80b6623 100644 --- a/stem/descriptor/router_status_entry.py +++ b/stem/descriptor/router_status_entry.py @@ -23,10 +23,16 @@ import base64 import binascii import datetime
-import stem.descriptor import stem.exit_policy import stem.util.str_tools
+from stem.descriptor import ( + KEYWORD_LINE, + Descriptor, + _get_descriptor_components, + _read_until_keywords, +) +
def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start_position = None, end_position = None, section_end_keywords = (), extra_args = ()): """ @@ -64,7 +70,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start # check if we're starting at the end of the section (ie, there's no entries to read) if section_end_keywords: first_keyword = None - line_match = stem.descriptor.KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline())) + line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
if line_match: first_keyword = line_match.groups()[0] @@ -75,7 +81,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start return
while end_position is None or document_file.tell() < end_position: - desc_lines, ending_keyword = stem.descriptor._read_until_keywords( + desc_lines, ending_keyword = _read_until_keywords( (entry_keyword,) + section_end_keywords, document_file, ignore_first = True, @@ -95,7 +101,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start break
-class RouterStatusEntry(stem.descriptor.Descriptor): +class RouterStatusEntry(Descriptor): """ Information about an individual router stored within a network status document. This is the common parent for concrete status entry types. @@ -147,7 +153,7 @@ class RouterStatusEntry(stem.descriptor.Descriptor):
self._unrecognized_lines = []
- entries = stem.descriptor._get_descriptor_components(content, validate) + entries = _get_descriptor_components(content, validate)
if validate: self._check_constraints(entries) diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index d23f41e..39d4645 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -34,7 +34,6 @@ import datetime import hashlib import re
-import stem.descriptor import stem.descriptor.extrainfo_descriptor import stem.exit_policy import stem.prereq @@ -45,6 +44,14 @@ import stem.version
from stem.util import log
+from stem.descriptor import ( + PGP_BLOCK_END, + Descriptor, + _get_bytes_field, + _get_descriptor_components, + _read_until_keywords, +) + # relay descriptors must have exactly one of the following REQUIRED_FIELDS = ( "router", @@ -118,12 +125,12 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True): # to the caller).
while True: - annotations = stem.descriptor._read_until_keywords("router", descriptor_file) - descriptor_content = stem.descriptor._read_until_keywords("router-signature", descriptor_file) + annotations = _read_until_keywords("router", descriptor_file) + descriptor_content = _read_until_keywords("router-signature", descriptor_file)
# we've reached the 'router-signature', now include the pgp style block - block_end_prefix = stem.descriptor.PGP_BLOCK_END.split(' ', 1)[0] - descriptor_content += stem.descriptor._read_until_keywords(block_end_prefix, descriptor_file, True) + block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] + descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
if descriptor_content: # strip newlines from annotations @@ -142,7 +149,7 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True): break # done parsing descriptors
-class ServerDescriptor(stem.descriptor.Descriptor): +class ServerDescriptor(Descriptor): """ Common parent for server descriptors.
@@ -216,8 +223,8 @@ class ServerDescriptor(stem.descriptor.Descriptor): # Only a few things can be arbitrary bytes according to the dir-spec, so # parsing them separately.
- self.platform = stem.descriptor._get_bytes_field("platform", raw_contents) - self.contact = stem.descriptor._get_bytes_field("contact", raw_contents) + self.platform = _get_bytes_field("platform", raw_contents) + self.contact = _get_bytes_field("contact", raw_contents)
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
@@ -272,8 +279,7 @@ class ServerDescriptor(stem.descriptor.Descriptor): # influences the resulting exit policy, but for everything else the order # does not matter so breaking it into key / value pairs.
- entries, policy = \ - stem.descriptor._get_descriptor_components(raw_contents, validate, ("accept", "reject")) + entries, policy = _get_descriptor_components(raw_contents, validate, ("accept", "reject"))
self.exit_policy = stem.exit_policy.ExitPolicy(*policy) self._parse(entries, validate) diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py index 7c45118..b3c549d 100644 --- a/test/integ/descriptor/remote.py +++ b/test/integ/descriptor/remote.py @@ -11,10 +11,6 @@ import stem.descriptor.router_status_entry import stem.descriptor.server_descriptor import test.runner
-# Required to prevent unmarshal error when running this test alone. - -import stem.descriptor.networkstatus -
class TestDescriptorDownloader(unittest.TestCase): def test_using_authorities(self): diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py index fb2e3f0..3ea303b 100644 --- a/test/unit/descriptor/remote.py +++ b/test/unit/descriptor/remote.py @@ -10,13 +10,6 @@ import stem.descriptor.remote
from mock import patch
-# The following isn't used by this directly, but we're still importing it due -# to a screwy aspect of how mock works. If patched() results in an import that -# we haven't done before then we can fail with a RuntimeError. In practice this -# just arises if we run this unit test on its own. - -import stem.descriptor.networkstatus - # Output from requesting moria1's descriptor from itself... # % curl http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE...