[tor-commits] [stem/master] Cookie paths with wide characters couldn't be loaded

atagar at torproject.org atagar at torproject.org
Sun Feb 12 05:52:41 UTC 2017


commit 48f76996c3bfa50580568acfebd4af838c7f7d9e
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Feb 11 15:24:57 2017 -0800

    Cookie paths with wide characters couldn't be loaded
    
    When using an authentication cookie with a path containing wide characters
    (chinese, japanese, etc) we failed to unescape and decode them, causing
    authentication to fail.
    
    Troubleshooting this revealed that we actually were incorrectly escaping all
    responses. Tor's esc_for_log covers more than the limited map we had...
    
      https://gitweb.torproject.org/tor.git/tree/src/common/util.c#n1241
    
    As such using python's function for doing C-style unescaping. Fingers crossed
    these always match up. ;P
---
 docs/change_log.rst                |  1 +
 stem/response/__init__.py          | 85 ++++++++++++++++----------------------
 stem/response/protocolinfo.py      | 10 ++++-
 test/unit/response/protocolinfo.py | 17 ++++++++
 4 files changed, 63 insertions(+), 50 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index 338150c..edd5fa8 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -46,6 +46,7 @@ The following are only available within Stem's `git repository
  * **Controller**
 
   * Added the GUARD_WAIT :data:`~stem.CircStatus` (:spec:`6446210`)
+  * Unable to use cookie auth when path includes wide characters (chinese, japanese, etc)
 
  * **Descriptors**
 
diff --git a/stem/response/__init__.py b/stem/response/__init__.py
index 8d1eaf8..a84654c 100644
--- a/stem/response/__init__.py
+++ b/stem/response/__init__.py
@@ -30,11 +30,13 @@ Parses replies from the control socket.
     +- pop_mapping - removes and returns the next entry as a KEY=VALUE mapping
 """
 
+import codecs
 import io
 import re
 import threading
 
 import stem.socket
+import stem.util.str_tools
 
 __all__ = [
   'add_onion',
@@ -51,16 +53,6 @@ __all__ = [
 
 KEY_ARG = re.compile('^(\S+)=')
 
-# Escape sequences from the 'esc_for_log' function of tor's 'common/util.c'.
-# It's hard to tell what controller functions use this in practice, but direct
-# users are...
-# - 'COOKIEFILE' field of PROTOCOLINFO responses
-# - logged messages about bugs
-# - the 'getinfo_helper_listeners' function of control.c
-
-CONTROL_ESCAPES = {r'\\': '\\', r'\"': '\"', r'\'': '\'',
-                   r'\r': '\r', r'\n': '\n', r'\t': '\t'}
-
 
 def convert(response_type, message, **kwargs):
   """
@@ -333,7 +325,7 @@ class ControlLine(str):
     """
     Checks if our next entry is a quoted value or not.
 
-    :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
+    :param bool escaped: unescapes the string
 
     :returns: **True** if the next entry can be parsed as a quoted value, **False** otherwise
     """
@@ -347,7 +339,7 @@ class ControlLine(str):
 
     :param str key: checks that the key matches this value, skipping the check if **None**
     :param bool quoted: checks that the mapping is to a quoted value
-    :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
+    :param bool escaped: unescapes the string
 
     :returns: **True** if the next entry can be parsed as a key=value mapping,
       **False** otherwise
@@ -405,7 +397,7 @@ class ControlLine(str):
         "this has a \\" and \\\\ in it"
 
     :param bool quoted: parses the next entry as a quoted value, removing the quotes
-    :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
+    :param bool escaped: unescapes the string
 
     :returns: **str** of the next space separated entry
 
@@ -415,17 +407,21 @@ class ControlLine(str):
     """
 
     with self._remainder_lock:
-      next_entry, remainder = _parse_entry(self._remainder, quoted, escaped)
+      next_entry, remainder = _parse_entry(self._remainder, quoted, escaped, False)
       self._remainder = remainder
       return next_entry
 
-  def pop_mapping(self, quoted = False, escaped = False):
+  def pop_mapping(self, quoted = False, escaped = False, get_bytes = False):
     """
     Parses the next space separated entry as a KEY=VALUE mapping, removing it
     and the space from our remaining content.
 
+    .. versionchanged:: 1.6.0
+       Added the get_bytes argument.
+
     :param bool quoted: parses the value as being quoted, removing the quotes
-    :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
+    :param bool escaped: unescapes the string
+    :param bool get_bytes: provides **bytes** for the **value** rather than a **str**
 
     :returns: **tuple** of the form (key, value)
 
@@ -447,18 +443,18 @@ class ControlLine(str):
       key = key_match.groups()[0]
       remainder = self._remainder[key_match.end():]
 
-      next_entry, remainder = _parse_entry(remainder, quoted, escaped)
+      next_entry, remainder = _parse_entry(remainder, quoted, escaped, get_bytes)
       self._remainder = remainder
       return (key, next_entry)
 
 
-def _parse_entry(line, quoted, escaped):
+def _parse_entry(line, quoted, escaped, get_bytes):
   """
   Parses the next entry from the given space separated content.
 
   :param str line: content to be parsed
   :param bool quoted: parses the next entry as a quoted value, removing the quotes
-  :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
+  :param bool escaped: unescapes the string
 
   :returns: **tuple** of the form (entry, remainder)
 
@@ -488,7 +484,26 @@ def _parse_entry(line, quoted, escaped):
       next_entry, remainder = remainder, ''
 
   if escaped:
-    next_entry = _unescape(next_entry)
+    # Tor does escaping in its 'esc_for_log' function of 'common/util.c'. It's
+    # hard to tell what controller functions use this in practice, but direct
+    # users are...
+    #
+    #   * 'COOKIEFILE' field of PROTOCOLINFO responses
+    #   * logged messages about bugs
+    #   * the 'getinfo_helper_listeners' function of control.c
+    #
+    # Ideally we'd use "next_entry.decode('string_escape')" but it was removed
+    # in python 3.x and 'unicode_escape' isn't quite the same...
+    #
+    #   https://stackoverflow.com/questions/14820429/how-do-i-decodestring-escape-in-python3
+
+    next_entry = codecs.escape_decode(next_entry)[0]
+
+    if stem.prereq.is_python_3() and not get_bytes:
+      next_entry = stem.util.str_tools._to_unicode(next_entry)  # normalize back to str
+
+  if get_bytes:
+    next_entry = stem.util.str_tools._to_bytes(next_entry)
 
   return (next_entry, remainder.lstrip())
 
@@ -498,7 +513,7 @@ def _get_quote_indices(line, escaped):
   Provides the indices of the next two quotes in the given content.
 
   :param str line: content to be parsed
-  :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences
+  :param bool escaped: unescapes the string
 
   :returns: **tuple** of two ints, indices being -1 if a quote doesn't exist
   """
@@ -519,34 +534,6 @@ def _get_quote_indices(line, escaped):
   return tuple(indices)
 
 
-def _unescape(entry):
-  # Unescapes the given string with the mappings in CONTROL_ESCAPES.
-  #
-  # This can't be a simple series of str.replace() calls because replacements
-  # need to be excluded from consideration for further unescaping. For
-  # instance, '\\t' should be converted to '\t' rather than a tab.
-
-  def _pop_with_unescape(entry):
-    # Pop either the first character or the escape sequence conversion the
-    # entry starts with. This provides a tuple of...
-    #
-    #   (unescaped prefix, remaining entry)
-
-    for esc_sequence, replacement in CONTROL_ESCAPES.items():
-      if entry.startswith(esc_sequence):
-        return (replacement, entry[len(esc_sequence):])
-
-    return (entry[0], entry[1:])
-
-  result = []
-
-  while entry:
-    prefix, entry = _pop_with_unescape(entry)
-    result.append(prefix)
-
-  return ''.join(result)
-
-
 class SingleLineResponse(ControlMessage):
   """
   Reply to a request that performs an action rather than querying data. These
diff --git a/stem/response/protocolinfo.py b/stem/response/protocolinfo.py
index 6243b4d..a4320b8 100644
--- a/stem/response/protocolinfo.py
+++ b/stem/response/protocolinfo.py
@@ -1,9 +1,13 @@
 # Copyright 2012-2017, Damian Johnson and The Tor Project
 # See LICENSE for licensing information
 
+import sys
+
+import stem.prereq
 import stem.response
 import stem.socket
 import stem.version
+import stem.util.str_tools
 
 from stem.connection import AuthMethod
 from stem.util import log
@@ -101,8 +105,12 @@ class ProtocolInfoResponse(stem.response.ControlMessage):
               auth_methods.append(AuthMethod.UNKNOWN)
 
         # parse optional COOKIEFILE mapping (quoted and can have escapes)
+
         if line.is_next_mapping('COOKIEFILE', True, True):
-          self.cookie_path = line.pop_mapping(True, True)[1]
+          self.cookie_path = line.pop_mapping(True, True, get_bytes = True)[1].decode(sys.getfilesystemencoding())
+
+          if stem.prereq.is_python_3():
+            self.cookie_path = stem.util.str_tools._to_unicode(self.cookie_path)  # normalize back to str
       elif line_type == 'VERSION':
         # Line format:
         #   VersionLine = "250-VERSION" SP "Tor=" TorVersion OptArguments CRLF
diff --git a/test/unit/response/protocolinfo.py b/test/unit/response/protocolinfo.py
index 4dcf291..c95076a 100644
--- a/test/unit/response/protocolinfo.py
+++ b/test/unit/response/protocolinfo.py
@@ -49,11 +49,18 @@ UNKNOWN_AUTH = """250-PROTOCOLINFO 1
 MINIMUM_RESPONSE = """250-PROTOCOLINFO 5
 250 OK"""
 
+UNICODE_COOKIE_PATH = r"""250-PROTOCOLINFO 1
+250-AUTH METHODS=COOKIE COOKIEFILE="/home/user/\346\226\207\346\241\243/tor-browser_en-US/Browser/TorBrowser/Data/Tor/control_auth_cookie"
+250-VERSION Tor="0.2.1.30"
+250 OK"""
+
 RELATIVE_COOKIE_PATH = r"""250-PROTOCOLINFO 1
 250-AUTH METHODS=COOKIE COOKIEFILE="./tor-browser_en-US/Data/control_auth_cookie"
 250-VERSION Tor="0.2.1.30"
 250 OK"""
 
+EXPECTED_UNICODE_PATH = b"/home/user/\346\226\207\346\241\243/tor-browser_en-US/Browser/TorBrowser/Data/Tor/control_auth_cookie".decode('utf-8')
+
 
 class TestProtocolInfoResponse(unittest.TestCase):
   def test_convert(self):
@@ -151,6 +158,16 @@ class TestProtocolInfoResponse(unittest.TestCase):
     self.assertEqual((), control_message.unknown_auth_methods)
     self.assertEqual(None, control_message.cookie_path)
 
+  @patch('sys.getfilesystemencoding', Mock(return_value = 'UTF-8'))
+  def test_unicode_cookie(self):
+    """
+    Checks an authentication cookie with a unicode path.
+    """
+
+    control_message = mocking.get_message(UNICODE_COOKIE_PATH)
+    stem.response.convert('PROTOCOLINFO', control_message)
+    self.assertEqual(EXPECTED_UNICODE_PATH, control_message.cookie_path)
+
   @patch('stem.util.proc.is_available', Mock(return_value = False))
   @patch('stem.util.system.is_available', Mock(return_value = True))
   def test_relative_cookie(self):



More information about the tor-commits mailing list