[stem/master] Providing a string when str() is called on descriptors

commit 1a099106c43e37bf41f5c1f803f27cd2aad270f5 Author: Damian Johnson <atagar@torproject.org> Date: Sun Feb 17 13:57:15 2013 -0800 Providing a string when str() is called on descriptors Python 2.x gets pretty confused when an object's __str__ method provides a unicode string. Calling... >>> str(desc) Traceback (most recent call last): File "<stdin>", line 1, in <module> UnicodeEncodeError: 'ascii' codec can't encode character u'\xab' in position 28: ordinal not in range(128) Providing an ascii str in python 2.x and unicode str in python 3.x. Thanks to Sathyanarayanan for the catch! --- stem/descriptor/__init__.py | 5 ++++- stem/util/str_tools.py | 14 ++++++++------ test/integ/descriptor/server_descriptor.py | 7 +++++++ test/settings.cfg | 4 ++-- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index b3cc8c9..25b180b 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -297,7 +297,10 @@ class Descriptor(object): self._archive_path = path def __str__(self): - return self._raw_contents + if stem.prereq.is_python_3(): + return self._raw_contents + else: + return str(stem.util.str_tools.to_bytes(self._raw_contents)) class _UnicodeReader(object): diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py index ef69da7..14198b4 100644 --- a/stem/util/str_tools.py +++ b/stem/util/str_tools.py @@ -20,6 +20,7 @@ Toolkit for various string activity. parse_iso_timestamp - parses an ISO timestamp as a datetime value """ +import codecs import datetime import stem.prereq @@ -52,11 +53,9 @@ TIME_UNITS = ( ) if stem.prereq.is_python_3(): - import codecs - def _to_bytes(msg): if isinstance(msg, str): - return codecs.latin_1_encode(msg)[0] + return codecs.latin_1_encode(msg, "replace")[0] else: return msg @@ -67,7 +66,10 @@ if stem.prereq.is_python_3(): return msg else: def _to_bytes(msg): - return msg + if msg is not None and isinstance(msg, unicode): + return codecs.latin_1_encode(msg, "replace")[0] + else: + return msg def _to_unicode(msg): if msg is not None and not isinstance(msg, unicode): @@ -84,7 +86,7 @@ def to_bytes(msg): http://python3porting.com/problems.html#nicer-solutions - :param msg label: string to be converted + :param str,unicode msg: string to be converted :returns: ASCII bytes for string """ @@ -98,7 +100,7 @@ def to_unicode(msg): provide python 3 compatability, normalizing the unicode/ASCII change in the version bump. - :param msg label: string to be converted + :param str,unicode msg: string to be converted :returns: unicode conversion """ diff --git a/test/integ/descriptor/server_descriptor.py b/test/integ/descriptor/server_descriptor.py index c1cd742..fb2e2b3 100644 --- a/test/integ/descriptor/server_descriptor.py +++ b/test/integ/descriptor/server_descriptor.py @@ -221,6 +221,13 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4= self.assertEquals(stem.exit_policy.ExitPolicy("reject *:*"), desc.exit_policy) self.assertEquals([], desc.get_unrecognized_lines()) + # Make sure that we can get a string representation for this descriptor + # (having unicode content risks a UnicodeEncodeError)... + # + # https://trac.torproject.org/8265 + + self.assertTrue(isinstance(str(desc), str)) + def test_cr_in_contact_line(self): """ Parses a descriptor with a huge contact line containing anomalous carriage diff --git a/test/settings.cfg b/test/settings.cfg index 9b606c5..558b43b 100644 --- a/test/settings.cfg +++ b/test/settings.cfg @@ -158,8 +158,8 @@ pyflakes.ignore stem/prereq.py => 'RSA' imported but unused pyflakes.ignore stem/prereq.py => 'asn1' imported but unused pyflakes.ignore stem/prereq.py => 'long_to_bytes' imported but unused pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 59 -pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 57 -pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 63 +pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 56 +pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 62 pyflakes.ignore test/mocking.py => undefined name 'builtins' pyflakes.ignore test/unit/response/events.py => 'from stem import *' used; unable to detect undefined names
participants (1)
-
atagar@torproject.org