[tor-commits] [stem/master] Providing a string when str() is called on descriptors
atagar at torproject.org
atagar at torproject.org
Sun Feb 17 22:02:36 UTC 2013
commit 1a099106c43e37bf41f5c1f803f27cd2aad270f5
Author: Damian Johnson <atagar at torproject.org>
Date: Sun Feb 17 13:57:15 2013 -0800
Providing a string when str() is called on descriptors
Python 2.x gets pretty confused when an object's __str__ method provides a
unicode string. Calling...
>>> str(desc)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
UnicodeEncodeError: 'ascii' codec can't encode character u'\xab' in position 28: ordinal not in range(128)
Providing an ascii str in python 2.x and unicode str in python 3.x. Thanks to
Sathyanarayanan for the catch!
---
stem/descriptor/__init__.py | 5 ++++-
stem/util/str_tools.py | 14 ++++++++------
test/integ/descriptor/server_descriptor.py | 7 +++++++
test/settings.cfg | 4 ++--
4 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index b3cc8c9..25b180b 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -297,7 +297,10 @@ class Descriptor(object):
self._archive_path = path
def __str__(self):
- return self._raw_contents
+ if stem.prereq.is_python_3():
+ return self._raw_contents
+ else:
+ return str(stem.util.str_tools.to_bytes(self._raw_contents))
class _UnicodeReader(object):
diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py
index ef69da7..14198b4 100644
--- a/stem/util/str_tools.py
+++ b/stem/util/str_tools.py
@@ -20,6 +20,7 @@ Toolkit for various string activity.
parse_iso_timestamp - parses an ISO timestamp as a datetime value
"""
+import codecs
import datetime
import stem.prereq
@@ -52,11 +53,9 @@ TIME_UNITS = (
)
if stem.prereq.is_python_3():
- import codecs
-
def _to_bytes(msg):
if isinstance(msg, str):
- return codecs.latin_1_encode(msg)[0]
+ return codecs.latin_1_encode(msg, "replace")[0]
else:
return msg
@@ -67,7 +66,10 @@ if stem.prereq.is_python_3():
return msg
else:
def _to_bytes(msg):
- return msg
+ if msg is not None and isinstance(msg, unicode):
+ return codecs.latin_1_encode(msg, "replace")[0]
+ else:
+ return msg
def _to_unicode(msg):
if msg is not None and not isinstance(msg, unicode):
@@ -84,7 +86,7 @@ def to_bytes(msg):
http://python3porting.com/problems.html#nicer-solutions
- :param msg label: string to be converted
+ :param str,unicode msg: string to be converted
:returns: ASCII bytes for string
"""
@@ -98,7 +100,7 @@ def to_unicode(msg):
provide python 3 compatability, normalizing the unicode/ASCII change in the
version bump.
- :param msg label: string to be converted
+ :param str,unicode msg: string to be converted
:returns: unicode conversion
"""
diff --git a/test/integ/descriptor/server_descriptor.py b/test/integ/descriptor/server_descriptor.py
index c1cd742..fb2e2b3 100644
--- a/test/integ/descriptor/server_descriptor.py
+++ b/test/integ/descriptor/server_descriptor.py
@@ -221,6 +221,13 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
self.assertEquals(stem.exit_policy.ExitPolicy("reject *:*"), desc.exit_policy)
self.assertEquals([], desc.get_unrecognized_lines())
+ # Make sure that we can get a string representation for this descriptor
+ # (having unicode content risks a UnicodeEncodeError)...
+ #
+ # https://trac.torproject.org/8265
+
+ self.assertTrue(isinstance(str(desc), str))
+
def test_cr_in_contact_line(self):
"""
Parses a descriptor with a huge contact line containing anomalous carriage
diff --git a/test/settings.cfg b/test/settings.cfg
index 9b606c5..558b43b 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -158,8 +158,8 @@ pyflakes.ignore stem/prereq.py => 'RSA' imported but unused
pyflakes.ignore stem/prereq.py => 'asn1' imported but unused
pyflakes.ignore stem/prereq.py => 'long_to_bytes' imported but unused
pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 59
-pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 57
-pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 63
+pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 56
+pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 62
pyflakes.ignore test/mocking.py => undefined name 'builtins'
pyflakes.ignore test/unit/response/events.py => 'from stem import *' used; unable to detect undefined names
More information about the tor-commits
mailing list