[tor-commits] [stem/master] Unescaped characters shouldn't be considered in further unescaping

atagar at torproject.org atagar at torproject.org
Sat Mar 16 23:06:29 UTC 2013


commit b8d79e24b4599b59df1d68bd99273cebb11a8038
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Mar 16 11:30:36 2013 -0700

    Unescaped characters shouldn't be considered in further unescaping
    
    When we unescape responses we should process the string from start to end, and
    when something is unescaped it needs to be removed from further consideration.
    For instance, '\\t' should be unescaped to '\t', not a tab. Caught on...
    
    https://trac.torproject.org/8471
---
 stem/response/__init__.py          |   31 +++++++++++++++++++++++++++++--
 test/unit/response/control_line.py |    7 +++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/stem/response/__init__.py b/stem/response/__init__.py
index 838ea89..c59c9bd 100644
--- a/stem/response/__init__.py
+++ b/stem/response/__init__.py
@@ -450,8 +450,7 @@ def _parse_entry(line, quoted, escaped):
       next_entry, remainder = remainder, ""
 
   if escaped:
-    for esc_sequence, replacement in CONTROL_ESCAPES.items():
-      next_entry = next_entry.replace(esc_sequence, replacement)
+    next_entry = _unescape(next_entry)
 
   return (next_entry, remainder.lstrip())
 
@@ -482,6 +481,34 @@ def _get_quote_indices(line, escaped):
   return tuple(indices)
 
 
+def _unescape(entry):
+  # Unescapes the given string with the mappings in CONTROL_ESCAPES.
+  #
+  # This can't be a simple series of str.replace() calls because replacements
+  # need to be excluded from consideration for further unescaping. For
+  # instance, '\\t' should be converted to '\t' rather than a tab.
+
+  def _pop_with_unescape(entry):
+    # Pop either the first character or the escape sequence conversion the
+    # entry starts with. This provides a tuple of...
+    #
+    #   (unescaped prefix, remaining entry)
+
+    for esc_sequence, replacement in CONTROL_ESCAPES.items():
+      if entry.startswith(esc_sequence):
+        return (replacement, entry[len(esc_sequence):])
+
+    return (entry[0], entry[1:])
+
+  result = []
+
+  while entry:
+    prefix, entry = _pop_with_unescape(entry)
+    result.append(prefix)
+
+  return "".join(result)
+
+
 class SingleLineResponse(ControlMessage):
   """
   Reply to a request that performs an action rather than querying data. These
diff --git a/test/unit/response/control_line.py b/test/unit/response/control_line.py
index c342dd2..c00baf6 100644
--- a/test/unit/response/control_line.py
+++ b/test/unit/response/control_line.py
@@ -165,3 +165,10 @@ class TestControlLine(unittest.TestCase):
     line = stem.response.ControlLine(cookie_file_entry)
     self.assertEquals(line.pop_mapping(True, True), ('COOKIEFILE', r'/tmp/my data\"dir//control_auth_cookie'))
     self.assertTrue(line.is_empty())
+
+    # try an escaped slash followed by a character that could be part of an
+    # escape sequence
+
+    line = stem.response.ControlLine(r'COOKIEFILE="C:\\Users\\Atagar\\AppData\\tor\\control_auth_cookie"')
+    self.assertEquals(line.pop_mapping(True, True), ('COOKIEFILE', r'C:\Users\Atagar\AppData\tor\control_auth_cookie'))
+    self.assertTrue(line.is_empty())



More information about the tor-commits mailing list