[tor-commits] [stem/master] Fixing server descriptor test expecting unicode

atagar at torproject.org atagar at torproject.org
Sat Feb 2 18:20:50 UTC 2013


commit bec2e9721e228f73c8b1438f1d398182893df27f
Author: Damian Johnson <atagar at torproject.org>
Date:   Thu Jan 31 08:22:38 2013 -0800

    Fixing server descriptor test expecting unicode
    
    One of the server descriptor integ tests had a failing assertion because the
    expected text was ASCII bytes and the descriptor content was unicode. Fixing
    the test and moving the to_unicode helper to str_tools where it belongs.
    
    ======================================================================
    FAIL: test_non_ascii_descriptor
    ----------------------------------------------------------------------
    Traceback:
      File "/home/atagar/Desktop/stem/test/data/python3/test/integ/descriptor/server_descriptor.py", line 221, in test_non_ascii_descriptor
        self.assertEquals(expected_contact, desc.contact)
    AssertionError: '2048R/F171EC1F Johan BlÃ¥bäck ã\x81\x93ã\x82\x93ã\x81«ã\x81¡ã\x81¯' != '2048R/F171EC1F Johan Blåbäck こんにちは'
    - 2048R/F171EC1F Johan Blåbäck こんにちは
    + 2048R/F171EC1F Johan Blåbäck こんにちは
---
 stem/descriptor/__init__.py                |   21 ++++-----------------
 stem/util/str_tools.py                     |   26 ++++++++++++++++++++++++++
 test/integ/descriptor/server_descriptor.py |    5 +++--
 test/settings.cfg                          |    5 +++--
 4 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 69d7db7..bebcdcc 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -28,6 +28,7 @@ import os
 import re
 
 import stem.prereq
+import stem.util.str_tools
 
 try:
   # added in python 2.7
@@ -274,10 +275,10 @@ class _UnicodeReader(object):
     return self.wrapped_file.next()
 
   def read(self, n = -1):
-    return self._to_unicode(self.wrapped_file.read(n))
+    return stem.util.str_tools.to_unicode(self.wrapped_file.read(n))
 
   def readline(self):
-    return self._to_unicode(self.wrapped_file.readline())
+    return stem.util.str_tools.to_unicode(self.wrapped_file.readline())
 
   def readlines(self, sizehint = 0):
     # being careful to do in-place conversion so we don't accidently double our
@@ -286,7 +287,7 @@ class _UnicodeReader(object):
     results = self.wrapped_file.readlines(sizehint)
 
     for i in xrange(len(results)):
-      results[i] = self._to_unicode(results[i])
+      results[i] = stem.util.str_tools.to_unicode(results[i])
 
     return results
 
@@ -296,20 +297,6 @@ class _UnicodeReader(object):
   def tell(self):
     return self.wrapped_file.tell()
 
-  def _to_unicode(self, msg):
-    if msg is None:
-      return msg
-
-    if stem.prereq.is_python_3():
-      is_unicode = isinstance(msg, str)
-    else:
-      is_unicode = isinstance(msg, unicode)
-
-    if is_unicode:
-      return msg
-    else:
-      return msg.decode("utf-8", "replace")
-
 
 def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False):
   """
diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py
index 2956419..37756ed 100644
--- a/stem/util/str_tools.py
+++ b/stem/util/str_tools.py
@@ -6,6 +6,7 @@ Toolkit for various string activity.
 ::
 
   to_bytes - normalizes string ASCII bytes
+  to_unicode - normalizes string to unicode
   to_camel_case - converts a string to camel case
   get_size_label - human readable label for a number of bytes
   get_time_label - human readable label for a number of seconds
@@ -55,10 +56,21 @@ if stem.prereq.is_python_3():
       return codecs.latin_1_encode(msg)[0]
     else:
       return msg
+
+  def _to_unicode(msg):
+    if msg is not None and not isinstance(msg, str):
+      return msg.decode("utf-8", "replace")
+    else:
+      return msg
 else:
   def _to_bytes(msg):
     return msg
 
+  def _to_unicode(msg):
+    if msg is not None and not isinstance(msg, unicode):
+      return msg.decode("utf-8", "replace")
+    else:
+      return msg
 
 def to_bytes(msg):
   """
@@ -76,6 +88,20 @@ def to_bytes(msg):
   return _to_bytes(msg)
 
 
+def to_unicode(msg):
+  """
+  Provides the unicode string for the given ASCII bytes. This is purely to
+  provide python 3 compatability, normalizing the unicode/ASCII change in the
+  version bump.
+
+  :param msg label: string to be converted
+
+  :returns: unicode conversion
+  """
+
+  return _to_unicode(msg)
+
+
 def to_camel_case(label, divider = "_", joiner = " "):
   """
   Converts the given string to camel case, ie:
diff --git a/test/integ/descriptor/server_descriptor.py b/test/integ/descriptor/server_descriptor.py
index 8eb262a..549cea8 100644
--- a/test/integ/descriptor/server_descriptor.py
+++ b/test/integ/descriptor/server_descriptor.py
@@ -12,6 +12,7 @@ import stem.control
 import stem.descriptor
 import stem.descriptor.server_descriptor
 import stem.exit_policy
+import stem.util.str_tools
 import stem.version
 import test.runner
 
@@ -198,10 +199,10 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
 
     descriptor_file = open_desc("non-ascii_descriptor")
     descriptor_file.readline()  # strip header
-    descriptor_contents = descriptor_file.read()
+    descriptor_contents = stem.util.str_tools.to_unicode(descriptor_file.read())
     descriptor_file.close()
 
-    expected_contact = "2048R/F171EC1F Johan Bl\xc3\xa5b\xc3\xa4ck \xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf"
+    expected_contact = b"2048R/F171EC1F Johan Bl\xc3\xa5b\xc3\xa4ck \xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf".decode("utf-8", "replace")
 
     desc = stem.descriptor.server_descriptor.RelayDescriptor(descriptor_contents)
     self.assertEquals("torrelay389752132", desc.nickname)
diff --git a/test/settings.cfg b/test/settings.cfg
index e7e659b..6ad4b59 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -157,8 +157,9 @@ target.torrc RUN_PTRACE   => PORT, PTRACE
 pyflakes.ignore stem/prereq.py => 'RSA' imported but unused
 pyflakes.ignore stem/prereq.py => 'asn1' imported but unused
 pyflakes.ignore stem/prereq.py => 'long_to_bytes' imported but unused
-pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 34
-pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 53
+pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 35
+pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 54
+pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 60
 pyflakes.ignore test/mocking.py => undefined name 'builtins'
 pyflakes.ignore test/unit/response/events.py => 'from stem import *' used; unable to detect undefined names
 





More information about the tor-commits mailing list