[tor-commits] [stem/master] Providing a string when str() is called on descriptors

atagar at torproject.org atagar at torproject.org
Sun Feb 17 22:02:36 UTC 2013


commit 1a099106c43e37bf41f5c1f803f27cd2aad270f5
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Feb 17 13:57:15 2013 -0800

    Providing a string when str() is called on descriptors
    
    Python 2.x gets pretty confused when an object's __str__ method provides a
    unicode string. Calling...
    
    >>> str(desc)
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    UnicodeEncodeError: 'ascii' codec can't encode character u'\xab' in position 28: ordinal not in range(128)
    
    Providing an ascii str in python 2.x and unicode str in python 3.x. Thanks to
    Sathyanarayanan for the catch!
---
 stem/descriptor/__init__.py                |    5 ++++-
 stem/util/str_tools.py                     |   14 ++++++++------
 test/integ/descriptor/server_descriptor.py |    7 +++++++
 test/settings.cfg                          |    4 ++--
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index b3cc8c9..25b180b 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -297,7 +297,10 @@ class Descriptor(object):
     self._archive_path = path
 
   def __str__(self):
-    return self._raw_contents
+    if stem.prereq.is_python_3():
+      return self._raw_contents
+    else:
+      return str(stem.util.str_tools.to_bytes(self._raw_contents))
 
 
 class _UnicodeReader(object):
diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py
index ef69da7..14198b4 100644
--- a/stem/util/str_tools.py
+++ b/stem/util/str_tools.py
@@ -20,6 +20,7 @@ Toolkit for various string activity.
   parse_iso_timestamp - parses an ISO timestamp as a datetime value
 """
 
+import codecs
 import datetime
 
 import stem.prereq
@@ -52,11 +53,9 @@ TIME_UNITS = (
 )
 
 if stem.prereq.is_python_3():
-  import codecs
-
   def _to_bytes(msg):
     if isinstance(msg, str):
-      return codecs.latin_1_encode(msg)[0]
+      return codecs.latin_1_encode(msg, "replace")[0]
     else:
       return msg
 
@@ -67,7 +66,10 @@ if stem.prereq.is_python_3():
       return msg
 else:
   def _to_bytes(msg):
-    return msg
+    if msg is not None and isinstance(msg, unicode):
+      return codecs.latin_1_encode(msg, "replace")[0]
+    else:
+      return msg
 
   def _to_unicode(msg):
     if msg is not None and not isinstance(msg, unicode):
@@ -84,7 +86,7 @@ def to_bytes(msg):
 
   http://python3porting.com/problems.html#nicer-solutions
 
-  :param msg label: string to be converted
+  :param str,unicode msg: string to be converted
 
   :returns: ASCII bytes for string
   """
@@ -98,7 +100,7 @@ def to_unicode(msg):
   provide python 3 compatability, normalizing the unicode/ASCII change in the
   version bump.
 
-  :param msg label: string to be converted
+  :param str,unicode msg: string to be converted
 
   :returns: unicode conversion
   """
diff --git a/test/integ/descriptor/server_descriptor.py b/test/integ/descriptor/server_descriptor.py
index c1cd742..fb2e2b3 100644
--- a/test/integ/descriptor/server_descriptor.py
+++ b/test/integ/descriptor/server_descriptor.py
@@ -221,6 +221,13 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
     self.assertEquals(stem.exit_policy.ExitPolicy("reject *:*"), desc.exit_policy)
     self.assertEquals([], desc.get_unrecognized_lines())
 
+    # Make sure that we can get a string representation for this descriptor
+    # (having unicode content risks a UnicodeEncodeError)...
+    #
+    # https://trac.torproject.org/8265
+
+    self.assertTrue(isinstance(str(desc), str))
+
   def test_cr_in_contact_line(self):
     """
     Parses a descriptor with a huge contact line containing anomalous carriage
diff --git a/test/settings.cfg b/test/settings.cfg
index 9b606c5..558b43b 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -158,8 +158,8 @@ pyflakes.ignore stem/prereq.py => 'RSA' imported but unused
 pyflakes.ignore stem/prereq.py => 'asn1' imported but unused
 pyflakes.ignore stem/prereq.py => 'long_to_bytes' imported but unused
 pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 59
-pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 57
-pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 63
+pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 56
+pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 62
 pyflakes.ignore test/mocking.py => undefined name 'builtins'
 pyflakes.ignore test/unit/response/events.py => 'from stem import *' used; unable to detect undefined names
 



More information about the tor-commits mailing list