
commit bec2e9721e228f73c8b1438f1d398182893df27f Author: Damian Johnson <atagar@torproject.org> Date: Thu Jan 31 08:22:38 2013 -0800 Fixing server descriptor test expecting unicode One of the server descriptor integ tests had a failing assertion because the expected text was ASCII bytes and the descriptor content was unicode. Fixing the test and moving the to_unicode helper to str_tools where it belongs. ====================================================================== FAIL: test_non_ascii_descriptor ---------------------------------------------------------------------- Traceback: File "/home/atagar/Desktop/stem/test/data/python3/test/integ/descriptor/server_descriptor.py", line 221, in test_non_ascii_descriptor self.assertEquals(expected_contact, desc.contact) AssertionError: '2048R/F171EC1F Johan BlÃ¥bäck ã\x81\x93ã\x82\x93ã\x81«ã\x81¡ã\x81¯' != '2048R/F171EC1F Johan Blåbäck こんにちは' - 2048R/F171EC1F Johan BlÃ¥bäck ããã«ã¡ã¯ + 2048R/F171EC1F Johan Blåbäck こんにちは --- stem/descriptor/__init__.py | 21 ++++----------------- stem/util/str_tools.py | 26 ++++++++++++++++++++++++++ test/integ/descriptor/server_descriptor.py | 5 +++-- test/settings.cfg | 5 +++-- 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 69d7db7..bebcdcc 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -28,6 +28,7 @@ import os import re import stem.prereq +import stem.util.str_tools try: # added in python 2.7 @@ -274,10 +275,10 @@ class _UnicodeReader(object): return self.wrapped_file.next() def read(self, n = -1): - return self._to_unicode(self.wrapped_file.read(n)) + return stem.util.str_tools.to_unicode(self.wrapped_file.read(n)) def readline(self): - return self._to_unicode(self.wrapped_file.readline()) + return stem.util.str_tools.to_unicode(self.wrapped_file.readline()) def readlines(self, sizehint = 0): # being careful to do in-place conversion so we don't accidently double our @@ -286,7 +287,7 @@ class _UnicodeReader(object): results = self.wrapped_file.readlines(sizehint) for i in xrange(len(results)): - results[i] = self._to_unicode(results[i]) + results[i] = stem.util.str_tools.to_unicode(results[i]) return results @@ -296,20 +297,6 @@ class _UnicodeReader(object): def tell(self): return self.wrapped_file.tell() - def _to_unicode(self, msg): - if msg is None: - return msg - - if stem.prereq.is_python_3(): - is_unicode = isinstance(msg, str) - else: - is_unicode = isinstance(msg, unicode) - - if is_unicode: - return msg - else: - return msg.decode("utf-8", "replace") - def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False): """ diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py index 2956419..37756ed 100644 --- a/stem/util/str_tools.py +++ b/stem/util/str_tools.py @@ -6,6 +6,7 @@ Toolkit for various string activity. :: to_bytes - normalizes string ASCII bytes + to_unicode - normalizes string to unicode to_camel_case - converts a string to camel case get_size_label - human readable label for a number of bytes get_time_label - human readable label for a number of seconds @@ -55,10 +56,21 @@ if stem.prereq.is_python_3(): return codecs.latin_1_encode(msg)[0] else: return msg + + def _to_unicode(msg): + if msg is not None and not isinstance(msg, str): + return msg.decode("utf-8", "replace") + else: + return msg else: def _to_bytes(msg): return msg + def _to_unicode(msg): + if msg is not None and not isinstance(msg, unicode): + return msg.decode("utf-8", "replace") + else: + return msg def to_bytes(msg): """ @@ -76,6 +88,20 @@ def to_bytes(msg): return _to_bytes(msg) +def to_unicode(msg): + """ + Provides the unicode string for the given ASCII bytes. This is purely to + provide python 3 compatability, normalizing the unicode/ASCII change in the + version bump. + + :param msg label: string to be converted + + :returns: unicode conversion + """ + + return _to_unicode(msg) + + def to_camel_case(label, divider = "_", joiner = " "): """ Converts the given string to camel case, ie: diff --git a/test/integ/descriptor/server_descriptor.py b/test/integ/descriptor/server_descriptor.py index 8eb262a..549cea8 100644 --- a/test/integ/descriptor/server_descriptor.py +++ b/test/integ/descriptor/server_descriptor.py @@ -12,6 +12,7 @@ import stem.control import stem.descriptor import stem.descriptor.server_descriptor import stem.exit_policy +import stem.util.str_tools import stem.version import test.runner @@ -198,10 +199,10 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4= descriptor_file = open_desc("non-ascii_descriptor") descriptor_file.readline() # strip header - descriptor_contents = descriptor_file.read() + descriptor_contents = stem.util.str_tools.to_unicode(descriptor_file.read()) descriptor_file.close() - expected_contact = "2048R/F171EC1F Johan Bl\xc3\xa5b\xc3\xa4ck \xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf" + expected_contact = b"2048R/F171EC1F Johan Bl\xc3\xa5b\xc3\xa4ck \xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf".decode("utf-8", "replace") desc = stem.descriptor.server_descriptor.RelayDescriptor(descriptor_contents) self.assertEquals("torrelay389752132", desc.nickname) diff --git a/test/settings.cfg b/test/settings.cfg index e7e659b..6ad4b59 100644 --- a/test/settings.cfg +++ b/test/settings.cfg @@ -157,8 +157,9 @@ target.torrc RUN_PTRACE => PORT, PTRACE pyflakes.ignore stem/prereq.py => 'RSA' imported but unused pyflakes.ignore stem/prereq.py => 'asn1' imported but unused pyflakes.ignore stem/prereq.py => 'long_to_bytes' imported but unused -pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 34 -pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 53 +pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 35 +pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 54 +pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 60 pyflakes.ignore test/mocking.py => undefined name 'builtins' pyflakes.ignore test/unit/response/events.py => 'from stem import *' used; unable to detect undefined names