[tor-commits] [stem/master] Fixing "cannot unmarshal code" errors

atagar at torproject.org atagar at torproject.org
Mon Jul 22 03:10:17 UTC 2013


commit d5b3ec93f44de01b21b27264e761fe8f09ec8012
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Jul 21 10:45:56 2013 -0700

    Fixing "cannot unmarshal code" errors
    
    Damnit python, your import scheme is stupidly confusing.
    
    The descriptor's __init__ module has a circular dependency with its contents.
    This is because the parse_file() function calls the constituent modules, while
    those modules need the Descriptor class from __init__.
    
    So far so good. Only trouble is that python's support for circular dependencies
    sucks. To address this I did lazy imports in __init__, so we imported within
    the parse_file() function.
    
    On the surface this seemed to work. All the tests certainly passed. The trouble
    is that this style of python import is buggy as hell. Turns out that lazy
    imports leave the module in question in a unexecutable state so this *only*
    works if you've also imported the module another time during the interpretor
    execution. Our tests did this, hence passing tests.
    
    I first encuntered "cannot unmarshal code" while writing the remote descritpor
    tests (both unit and integ). I was content to hack around this with superfluous
    import statements while this only manifested within the tests, but now I'm
    seeing it during general usage too...
    
    >>> from stem.descriptor.remote import DescriptorDownloader
    >>> list(DescriptorDownloader().get_microdescriptors('jzcx+1fHsi47Tu+vQIcyItgn4lKs6aKnFshQ0lZ2JTg'))
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
      File "stem/descriptor/remote.py", line 311, in __iter__
        for desc in self.run(True):
      File "stem/descriptor/remote.py", line 300, in run
        for desc in self._results:
      File "stem/descriptor/__init__.py", line 154, in parse_file
        import stem.descriptor.server_descriptor
    RuntimeError: cannot unmarshal code objects in restricted execution mode
    
    Joy. After much head scratching and forum reading it sounds like there's
    something magical about 'from' imports so switching the descriptor modules to
    that, and moving the __init__ imports to the end. I'm not entirely clear on the
    magic going on here, but its elmiminated the errors.
---
 stem/descriptor/__init__.py             |   15 ++++++------
 stem/descriptor/extrainfo_descriptor.py |   18 +++++++++-----
 stem/descriptor/microdescriptor.py      |   13 ++++++----
 stem/descriptor/networkstatus.py        |   40 ++++++++++++++++++-------------
 stem/descriptor/router_status_entry.py  |   16 +++++++++----
 stem/descriptor/server_descriptor.py    |   26 ++++++++++++--------
 test/integ/descriptor/remote.py         |    4 ----
 test/unit/descriptor/remote.py          |    7 ------
 8 files changed, 78 insertions(+), 61 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index f1fdee4..e3b5a8b 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -151,11 +151,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
 
       return
 
-  import stem.descriptor.server_descriptor
-  import stem.descriptor.extrainfo_descriptor
-  import stem.descriptor.networkstatus
-  import stem.descriptor.microdescriptor
-
   # The tor descriptor specifications do not provide a reliable method for
   # identifying a descriptor file's type and version so we need to guess
   # based on its filename. Metrics descriptors, however, can be identified
@@ -217,9 +212,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
 def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler):
   # Parses descriptor files from metrics, yielding individual descriptors. This
   # throws a TypeError if the descriptor_type or version isn't recognized.
-  import stem.descriptor.server_descriptor
-  import stem.descriptor.extrainfo_descriptor
-  import stem.descriptor.networkstatus
 
   if descriptor_type == "server-descriptor" and major_version == 1:
     for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate):
@@ -541,3 +533,10 @@ def _get_descriptor_components(raw_contents, validate, extra_keywords = ()):
     return entries, extra_entries
   else:
     return entries
+
+# importing at the end to avoid circular dependencies on our Descriptor class
+
+import stem.descriptor.server_descriptor
+import stem.descriptor.extrainfo_descriptor
+import stem.descriptor.networkstatus
+import stem.descriptor.microdescriptor
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index e9aea30..fac0991 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -72,11 +72,17 @@ import datetime
 import hashlib
 import re
 
-import stem.descriptor
 import stem.util.connection
 import stem.util.enum
 import stem.util.str_tools
 
+from stem.descriptor import (
+  PGP_BLOCK_END,
+  Descriptor,
+  _read_until_keywords,
+  _get_descriptor_components,
+)
+
 # known statuses for dirreq-v2-resp and dirreq-v3-resp...
 DirResponse = stem.util.enum.Enum(
   ("OK", "ok"),
@@ -156,11 +162,11 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True):
   """
 
   while True:
-    extrainfo_content = stem.descriptor._read_until_keywords("router-signature", descriptor_file)
+    extrainfo_content = _read_until_keywords("router-signature", descriptor_file)
 
     # we've reached the 'router-signature', now include the pgp style block
-    block_end_prefix = stem.descriptor.PGP_BLOCK_END.split(' ', 1)[0]
-    extrainfo_content += stem.descriptor._read_until_keywords(block_end_prefix, descriptor_file, True)
+    block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+    extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
 
     if extrainfo_content:
       if is_bridge:
@@ -205,7 +211,7 @@ def _parse_timestamp_and_interval(keyword, content):
     raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))
 
 
-class ExtraInfoDescriptor(stem.descriptor.Descriptor):
+class ExtraInfoDescriptor(Descriptor):
   """
   Extra-info descriptor document.
 
@@ -400,7 +406,7 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
 
     self._unrecognized_lines = []
 
-    entries = stem.descriptor._get_descriptor_components(raw_contents, validate)
+    entries = _get_descriptor_components(raw_contents, validate)
 
     if validate:
       for keyword in self._required_fields():
diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py
index 499e170..5834e18 100644
--- a/stem/descriptor/microdescriptor.py
+++ b/stem/descriptor/microdescriptor.py
@@ -66,10 +66,15 @@ Doing the same is trivial with server descriptors...
 
 import hashlib
 
-import stem.descriptor
 import stem.descriptor.router_status_entry
 import stem.exit_policy
 
+from stem.descriptor import (
+  Descriptor,
+  _get_descriptor_components,
+  _read_until_keywords,
+)
+
 REQUIRED_FIELDS = (
   "onion-key",
 )
@@ -99,7 +104,7 @@ def _parse_file(descriptor_file, validate = True):
   """
 
   while True:
-    annotations = stem.descriptor._read_until_keywords("onion-key", descriptor_file)
+    annotations = _read_until_keywords("onion-key", descriptor_file)
 
     # read until we reach an annotation or onion-key line
     descriptor_lines = []
@@ -136,7 +141,7 @@ def _parse_file(descriptor_file, validate = True):
       break  # done parsing descriptors
 
 
-class Microdescriptor(stem.descriptor.Descriptor):
+class Microdescriptor(Descriptor):
   """
   Microdescriptor (`descriptor specification
   <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_)
@@ -174,7 +179,7 @@ class Microdescriptor(stem.descriptor.Descriptor):
     self._annotation_lines = annotations if annotations else []
     self._annotation_dict = None  # cached breakdown of key/value mappings
 
-    entries = stem.descriptor._get_descriptor_components(raw_contents, validate)
+    entries = _get_descriptor_components(raw_contents, validate)
     self._parse(entries, validate)
 
     if validate:
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index f65c7dc..baf7f0a 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -49,12 +49,18 @@ For more information see :func:`~stem.descriptor.__init__.DocumentHandler`...
 import datetime
 import io
 
-import stem.descriptor
 import stem.descriptor.router_status_entry
 import stem.util.str_tools
 import stem.util.tor_tools
 import stem.version
 
+from stem.descriptor import (
+  Descriptor,
+  DocumentHandler,
+  _get_descriptor_components,
+  _read_until_keywords,
+)
+
 # Version 2 network status document fields, tuples of the form...
 # (keyword, is_mandatory)
 
@@ -138,7 +144,7 @@ KEY_CERTIFICATE_PARAMS = (
 )
 
 
-def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
+def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = DocumentHandler.ENTRIES):
   """
   Parses a network status and iterates over the RouterStatusEntry in it. The
   document that these instances reference have an empty 'routers' attribute to
@@ -180,24 +186,24 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde
   else:
     raise ValueError("Document type %i isn't recognized (only able to parse v2, v3, and bridge)" % document_type)
 
-  if document_handler == stem.descriptor.DocumentHandler.DOCUMENT:
+  if document_handler == DocumentHandler.DOCUMENT:
     yield document_type(document_file.read(), validate)
     return
 
   # getting the document without the routers section
 
-  header = stem.descriptor._read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file)
+  header = _read_until_keywords((ROUTERS_START, FOOTER_START, V2_FOOTER_START), document_file)
 
   routers_start = document_file.tell()
-  stem.descriptor._read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True)
+  _read_until_keywords((FOOTER_START, V2_FOOTER_START), document_file, skip = True)
   routers_end = document_file.tell()
 
   footer = document_file.readlines()
   document_content = bytes.join(b"", header + footer)
 
-  if document_handler == stem.descriptor.DocumentHandler.BARE_DOCUMENT:
+  if document_handler == DocumentHandler.BARE_DOCUMENT:
     yield document_type(document_content, validate)
-  elif document_handler == stem.descriptor.DocumentHandler.ENTRIES:
+  elif document_handler == DocumentHandler.ENTRIES:
     desc_iterator = stem.descriptor.router_status_entry._parse_file(
       document_file,
       validate,
@@ -214,7 +220,7 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde
     raise ValueError("Unrecognized document_handler: %s" % document_handler)
 
 
-class NetworkStatusDocument(stem.descriptor.Descriptor):
+class NetworkStatusDocument(Descriptor):
   """
   Common parent for network status documents.
   """
@@ -281,7 +287,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
     # deprecated descriptor type - patches welcome if you want those checks.
 
     document_file = io.BytesIO(raw_content)
-    document_content = bytes.join(b"", stem.descriptor._read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file))
+    document_content = bytes.join(b"", _read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file))
 
     router_iter = stem.descriptor.router_status_entry._parse_file(
       document_file,
@@ -297,7 +303,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
     document_content += b"\n" + document_file.read()
     document_content = stem.util.str_tools._to_unicode(document_content)
 
-    entries = stem.descriptor._get_descriptor_components(document_content, validate)
+    entries = _get_descriptor_components(document_content, validate)
 
     if validate:
       self._check_constraints(entries)
@@ -556,9 +562,9 @@ class _DocumentHeader(object):
 
     self._unrecognized_lines = []
 
-    content = bytes.join(b"", stem.descriptor._read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file))
+    content = bytes.join(b"", _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file))
     content = stem.util.str_tools._to_unicode(content)
-    entries = stem.descriptor._get_descriptor_components(content, validate)
+    entries = _get_descriptor_components(content, validate)
     self._parse(entries, validate)
 
     # doing this validation afterward so we know our 'is_consensus' and
@@ -792,7 +798,7 @@ class _DocumentFooter(object):
     if not content:
       return  # footer is optional and there's nothing to parse
 
-    entries = stem.descriptor._get_descriptor_components(content, validate)
+    entries = _get_descriptor_components(content, validate)
     self._parse(entries, validate, header)
 
     if validate:
@@ -948,7 +954,7 @@ def _parse_int_mappings(keyword, value, validate):
   return results
 
 
-class DirectoryAuthority(stem.descriptor.Descriptor):
+class DirectoryAuthority(Descriptor):
   """
   Directory authority information obtained from a v3 network status document.
 
@@ -1034,7 +1040,7 @@ class DirectoryAuthority(stem.descriptor.Descriptor):
     else:
       key_cert_content = None
 
-    entries = stem.descriptor._get_descriptor_components(content, validate)
+    entries = _get_descriptor_components(content, validate)
 
     if validate and 'dir-source' != entries.keys()[0]:
       raise ValueError("Authority entries are expected to start with a 'dir-source' line:\n%s" % (content))
@@ -1168,7 +1174,7 @@ class DirectoryAuthority(stem.descriptor.Descriptor):
     return self._compare(other, lambda s, o: s <= o)
 
 
-class KeyCertificate(stem.descriptor.Descriptor):
+class KeyCertificate(Descriptor):
   """
   Directory key certificate for a v3 network status document.
 
@@ -1216,7 +1222,7 @@ class KeyCertificate(stem.descriptor.Descriptor):
     :raises: **ValueError** if a validity check fails
     """
 
-    entries = stem.descriptor._get_descriptor_components(content, validate)
+    entries = _get_descriptor_components(content, validate)
 
     if validate:
       if 'dir-key-certificate-version' != entries.keys()[0]:
diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py
index 076baab..80b6623 100644
--- a/stem/descriptor/router_status_entry.py
+++ b/stem/descriptor/router_status_entry.py
@@ -23,10 +23,16 @@ import base64
 import binascii
 import datetime
 
-import stem.descriptor
 import stem.exit_policy
 import stem.util.str_tools
 
+from stem.descriptor import (
+  KEYWORD_LINE,
+  Descriptor,
+  _get_descriptor_components,
+  _read_until_keywords,
+)
+
 
 def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start_position = None, end_position = None, section_end_keywords = (), extra_args = ()):
   """
@@ -64,7 +70,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start
   # check if we're starting at the end of the section (ie, there's no entries to read)
   if section_end_keywords:
     first_keyword = None
-    line_match = stem.descriptor.KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
+    line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
 
     if line_match:
       first_keyword = line_match.groups()[0]
@@ -75,7 +81,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start
       return
 
   while end_position is None or document_file.tell() < end_position:
-    desc_lines, ending_keyword = stem.descriptor._read_until_keywords(
+    desc_lines, ending_keyword = _read_until_keywords(
       (entry_keyword,) + section_end_keywords,
       document_file,
       ignore_first = True,
@@ -95,7 +101,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start
       break
 
 
-class RouterStatusEntry(stem.descriptor.Descriptor):
+class RouterStatusEntry(Descriptor):
   """
   Information about an individual router stored within a network status
   document. This is the common parent for concrete status entry types.
@@ -147,7 +153,7 @@ class RouterStatusEntry(stem.descriptor.Descriptor):
 
     self._unrecognized_lines = []
 
-    entries = stem.descriptor._get_descriptor_components(content, validate)
+    entries = _get_descriptor_components(content, validate)
 
     if validate:
       self._check_constraints(entries)
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index d23f41e..39d4645 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -34,7 +34,6 @@ import datetime
 import hashlib
 import re
 
-import stem.descriptor
 import stem.descriptor.extrainfo_descriptor
 import stem.exit_policy
 import stem.prereq
@@ -45,6 +44,14 @@ import stem.version
 
 from stem.util import log
 
+from stem.descriptor import (
+  PGP_BLOCK_END,
+  Descriptor,
+  _get_bytes_field,
+  _get_descriptor_components,
+  _read_until_keywords,
+)
+
 # relay descriptors must have exactly one of the following
 REQUIRED_FIELDS = (
   "router",
@@ -118,12 +125,12 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True):
   # to the caller).
 
   while True:
-    annotations = stem.descriptor._read_until_keywords("router", descriptor_file)
-    descriptor_content = stem.descriptor._read_until_keywords("router-signature", descriptor_file)
+    annotations = _read_until_keywords("router", descriptor_file)
+    descriptor_content = _read_until_keywords("router-signature", descriptor_file)
 
     # we've reached the 'router-signature', now include the pgp style block
-    block_end_prefix = stem.descriptor.PGP_BLOCK_END.split(' ', 1)[0]
-    descriptor_content += stem.descriptor._read_until_keywords(block_end_prefix, descriptor_file, True)
+    block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+    descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
 
     if descriptor_content:
       # strip newlines from annotations
@@ -142,7 +149,7 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True):
       break  # done parsing descriptors
 
 
-class ServerDescriptor(stem.descriptor.Descriptor):
+class ServerDescriptor(Descriptor):
   """
   Common parent for server descriptors.
 
@@ -216,8 +223,8 @@ class ServerDescriptor(stem.descriptor.Descriptor):
     # Only a few things can be arbitrary bytes according to the dir-spec, so
     # parsing them separately.
 
-    self.platform = stem.descriptor._get_bytes_field("platform", raw_contents)
-    self.contact = stem.descriptor._get_bytes_field("contact", raw_contents)
+    self.platform = _get_bytes_field("platform", raw_contents)
+    self.contact = _get_bytes_field("contact", raw_contents)
 
     raw_contents = stem.util.str_tools._to_unicode(raw_contents)
 
@@ -272,8 +279,7 @@ class ServerDescriptor(stem.descriptor.Descriptor):
     # influences the resulting exit policy, but for everything else the order
     # does not matter so breaking it into key / value pairs.
 
-    entries, policy = \
-      stem.descriptor._get_descriptor_components(raw_contents, validate, ("accept", "reject"))
+    entries, policy = _get_descriptor_components(raw_contents, validate, ("accept", "reject"))
 
     self.exit_policy = stem.exit_policy.ExitPolicy(*policy)
     self._parse(entries, validate)
diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py
index 7c45118..b3c549d 100644
--- a/test/integ/descriptor/remote.py
+++ b/test/integ/descriptor/remote.py
@@ -11,10 +11,6 @@ import stem.descriptor.router_status_entry
 import stem.descriptor.server_descriptor
 import test.runner
 
-# Required to prevent unmarshal error when running this test alone.
-
-import stem.descriptor.networkstatus
-
 
 class TestDescriptorDownloader(unittest.TestCase):
   def test_using_authorities(self):
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index fb2e3f0..3ea303b 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -10,13 +10,6 @@ import stem.descriptor.remote
 
 from mock import patch
 
-# The following isn't used by this directly, but we're still importing it due
-# to a screwy aspect of how mock works. If patched() results in an import that
-# we haven't done before then we can fail with a RuntimeError. In practice this
-# just arises if we run this unit test on its own.
-
-import stem.descriptor.networkstatus
-
 # Output from requesting moria1's descriptor from itself...
 # % curl http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31
 





More information about the tor-commits mailing list