commit 48f76996c3bfa50580568acfebd4af838c7f7d9e Author: Damian Johnson atagar@torproject.org Date: Sat Feb 11 15:24:57 2017 -0800
Cookie paths with wide characters couldn't be loaded
When using an authentication cookie with a path containing wide characters (chinese, japanese, etc) we failed to unescape and decode them, causing authentication to fail.
Troubleshooting this revealed that we actually were incorrectly escaping all responses. Tor's esc_for_log covers more than the limited map we had...
https://gitweb.torproject.org/tor.git/tree/src/common/util.c#n1241
As such using python's function for doing C-style unescaping. Fingers crossed these always match up. ;P --- docs/change_log.rst | 1 + stem/response/__init__.py | 85 ++++++++++++++++---------------------- stem/response/protocolinfo.py | 10 ++++- test/unit/response/protocolinfo.py | 17 ++++++++ 4 files changed, 63 insertions(+), 50 deletions(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst index 338150c..edd5fa8 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -46,6 +46,7 @@ The following are only available within Stem's `git repository * **Controller**
* Added the GUARD_WAIT :data:`~stem.CircStatus` (:spec:`6446210`) + * Unable to use cookie auth when path includes wide characters (chinese, japanese, etc)
* **Descriptors**
diff --git a/stem/response/__init__.py b/stem/response/__init__.py index 8d1eaf8..a84654c 100644 --- a/stem/response/__init__.py +++ b/stem/response/__init__.py @@ -30,11 +30,13 @@ Parses replies from the control socket. +- pop_mapping - removes and returns the next entry as a KEY=VALUE mapping """
+import codecs import io import re import threading
import stem.socket +import stem.util.str_tools
__all__ = [ 'add_onion', @@ -51,16 +53,6 @@ __all__ = [
KEY_ARG = re.compile('^(\S+)=')
-# Escape sequences from the 'esc_for_log' function of tor's 'common/util.c'. -# It's hard to tell what controller functions use this in practice, but direct -# users are... -# - 'COOKIEFILE' field of PROTOCOLINFO responses -# - logged messages about bugs -# - the 'getinfo_helper_listeners' function of control.c - -CONTROL_ESCAPES = {r'\': '\', r'"': '"', r''': ''', - r'\r': '\r', r'\n': '\n', r'\t': '\t'} -
def convert(response_type, message, **kwargs): """ @@ -333,7 +325,7 @@ class ControlLine(str): """ Checks if our next entry is a quoted value or not.
- :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + :param bool escaped: unescapes the string
:returns: **True** if the next entry can be parsed as a quoted value, **False** otherwise """ @@ -347,7 +339,7 @@ class ControlLine(str):
:param str key: checks that the key matches this value, skipping the check if **None** :param bool quoted: checks that the mapping is to a quoted value - :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + :param bool escaped: unescapes the string
:returns: **True** if the next entry can be parsed as a key=value mapping, **False** otherwise @@ -405,7 +397,7 @@ class ControlLine(str): "this has a \" and \\ in it"
:param bool quoted: parses the next entry as a quoted value, removing the quotes - :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + :param bool escaped: unescapes the string
:returns: **str** of the next space separated entry
@@ -415,17 +407,21 @@ class ControlLine(str): """
with self._remainder_lock: - next_entry, remainder = _parse_entry(self._remainder, quoted, escaped) + next_entry, remainder = _parse_entry(self._remainder, quoted, escaped, False) self._remainder = remainder return next_entry
- def pop_mapping(self, quoted = False, escaped = False): + def pop_mapping(self, quoted = False, escaped = False, get_bytes = False): """ Parses the next space separated entry as a KEY=VALUE mapping, removing it and the space from our remaining content.
+ .. versionchanged:: 1.6.0 + Added the get_bytes argument. + :param bool quoted: parses the value as being quoted, removing the quotes - :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + :param bool escaped: unescapes the string + :param bool get_bytes: provides **bytes** for the **value** rather than a **str**
:returns: **tuple** of the form (key, value)
@@ -447,18 +443,18 @@ class ControlLine(str): key = key_match.groups()[0] remainder = self._remainder[key_match.end():]
- next_entry, remainder = _parse_entry(remainder, quoted, escaped) + next_entry, remainder = _parse_entry(remainder, quoted, escaped, get_bytes) self._remainder = remainder return (key, next_entry)
-def _parse_entry(line, quoted, escaped): +def _parse_entry(line, quoted, escaped, get_bytes): """ Parses the next entry from the given space separated content.
:param str line: content to be parsed :param bool quoted: parses the next entry as a quoted value, removing the quotes - :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + :param bool escaped: unescapes the string
:returns: **tuple** of the form (entry, remainder)
@@ -488,7 +484,26 @@ def _parse_entry(line, quoted, escaped): next_entry, remainder = remainder, ''
if escaped: - next_entry = _unescape(next_entry) + # Tor does escaping in its 'esc_for_log' function of 'common/util.c'. It's + # hard to tell what controller functions use this in practice, but direct + # users are... + # + # * 'COOKIEFILE' field of PROTOCOLINFO responses + # * logged messages about bugs + # * the 'getinfo_helper_listeners' function of control.c + # + # Ideally we'd use "next_entry.decode('string_escape')" but it was removed + # in python 3.x and 'unicode_escape' isn't quite the same... + # + # https://stackoverflow.com/questions/14820429/how-do-i-decodestring-escape-in... + + next_entry = codecs.escape_decode(next_entry)[0] + + if stem.prereq.is_python_3() and not get_bytes: + next_entry = stem.util.str_tools._to_unicode(next_entry) # normalize back to str + + if get_bytes: + next_entry = stem.util.str_tools._to_bytes(next_entry)
return (next_entry, remainder.lstrip())
@@ -498,7 +513,7 @@ def _get_quote_indices(line, escaped): Provides the indices of the next two quotes in the given content.
:param str line: content to be parsed - :param bool escaped: unescapes the CONTROL_ESCAPES escape sequences + :param bool escaped: unescapes the string
:returns: **tuple** of two ints, indices being -1 if a quote doesn't exist """ @@ -519,34 +534,6 @@ def _get_quote_indices(line, escaped): return tuple(indices)
-def _unescape(entry): - # Unescapes the given string with the mappings in CONTROL_ESCAPES. - # - # This can't be a simple series of str.replace() calls because replacements - # need to be excluded from consideration for further unescaping. For - # instance, '\t' should be converted to '\t' rather than a tab. - - def _pop_with_unescape(entry): - # Pop either the first character or the escape sequence conversion the - # entry starts with. This provides a tuple of... - # - # (unescaped prefix, remaining entry) - - for esc_sequence, replacement in CONTROL_ESCAPES.items(): - if entry.startswith(esc_sequence): - return (replacement, entry[len(esc_sequence):]) - - return (entry[0], entry[1:]) - - result = [] - - while entry: - prefix, entry = _pop_with_unescape(entry) - result.append(prefix) - - return ''.join(result) - - class SingleLineResponse(ControlMessage): """ Reply to a request that performs an action rather than querying data. These diff --git a/stem/response/protocolinfo.py b/stem/response/protocolinfo.py index 6243b4d..a4320b8 100644 --- a/stem/response/protocolinfo.py +++ b/stem/response/protocolinfo.py @@ -1,9 +1,13 @@ # Copyright 2012-2017, Damian Johnson and The Tor Project # See LICENSE for licensing information
+import sys + +import stem.prereq import stem.response import stem.socket import stem.version +import stem.util.str_tools
from stem.connection import AuthMethod from stem.util import log @@ -101,8 +105,12 @@ class ProtocolInfoResponse(stem.response.ControlMessage): auth_methods.append(AuthMethod.UNKNOWN)
# parse optional COOKIEFILE mapping (quoted and can have escapes) + if line.is_next_mapping('COOKIEFILE', True, True): - self.cookie_path = line.pop_mapping(True, True)[1] + self.cookie_path = line.pop_mapping(True, True, get_bytes = True)[1].decode(sys.getfilesystemencoding()) + + if stem.prereq.is_python_3(): + self.cookie_path = stem.util.str_tools._to_unicode(self.cookie_path) # normalize back to str elif line_type == 'VERSION': # Line format: # VersionLine = "250-VERSION" SP "Tor=" TorVersion OptArguments CRLF diff --git a/test/unit/response/protocolinfo.py b/test/unit/response/protocolinfo.py index 4dcf291..c95076a 100644 --- a/test/unit/response/protocolinfo.py +++ b/test/unit/response/protocolinfo.py @@ -49,11 +49,18 @@ UNKNOWN_AUTH = """250-PROTOCOLINFO 1 MINIMUM_RESPONSE = """250-PROTOCOLINFO 5 250 OK"""
+UNICODE_COOKIE_PATH = r"""250-PROTOCOLINFO 1 +250-AUTH METHODS=COOKIE COOKIEFILE="/home/user/\346\226\207\346\241\243/tor-browser_en-US/Browser/TorBrowser/Data/Tor/control_auth_cookie" +250-VERSION Tor="0.2.1.30" +250 OK""" + RELATIVE_COOKIE_PATH = r"""250-PROTOCOLINFO 1 250-AUTH METHODS=COOKIE COOKIEFILE="./tor-browser_en-US/Data/control_auth_cookie" 250-VERSION Tor="0.2.1.30" 250 OK"""
+EXPECTED_UNICODE_PATH = b"/home/user/\346\226\207\346\241\243/tor-browser_en-US/Browser/TorBrowser/Data/Tor/control_auth_cookie".decode('utf-8') +
class TestProtocolInfoResponse(unittest.TestCase): def test_convert(self): @@ -151,6 +158,16 @@ class TestProtocolInfoResponse(unittest.TestCase): self.assertEqual((), control_message.unknown_auth_methods) self.assertEqual(None, control_message.cookie_path)
+ @patch('sys.getfilesystemencoding', Mock(return_value = 'UTF-8')) + def test_unicode_cookie(self): + """ + Checks an authentication cookie with a unicode path. + """ + + control_message = mocking.get_message(UNICODE_COOKIE_PATH) + stem.response.convert('PROTOCOLINFO', control_message) + self.assertEqual(EXPECTED_UNICODE_PATH, control_message.cookie_path) + @patch('stem.util.proc.is_available', Mock(return_value = False)) @patch('stem.util.system.is_available', Mock(return_value = True)) def test_relative_cookie(self):
tor-commits@lists.torproject.org