[tor-commits] [stem/master] Utility function for parsing control messages

atagar at torproject.org atagar at torproject.org
Fri Nov 4 17:19:04 UTC 2011


commit a090373e5384026295b31daee8ef622a589a913d
Author: Damian Johnson <atagar at torproject.org>
Date:   Tue Nov 1 18:49:55 2011 -0700

    Utility function for parsing control messages
    
    Trying out a utility function to pop the first item off a control response,
    with the caller providing attributes it should have.
---
 stem/types.py |   80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 80 insertions(+), 0 deletions(-)

diff --git a/stem/types.py b/stem/types.py
index b289921..e6188ae 100644
--- a/stem/types.py
+++ b/stem/types.py
@@ -22,6 +22,10 @@ import socket
 
 from stem.util import log
 
+# Escape sequences from the 'esc_for_log' function of tor's 'common/util.c'.
+CONTROL_ESCAPES = {r"\\": "\\",  r"\"": "\"",   r"\'": "'",
+                   r"\r": "\r",  r"\n": "\n",   r"\t": "\t"}
+
 class ProtocolError(Exception):
   "Malformed content from the control socket."
   pass
@@ -282,3 +286,79 @@ class Version:
 # TODO: version requirements will probably be moved to another module later
 REQ_GETINFO_CONFIG_TEXT = Version("0.2.2.7-alpha")
 
+# TODO: trying this out temporarily to see if it's generally helpful or another
+# parser function would be a better fit
+def get_entry(line, mapping = False, quoted = False, escaped = False):
+  """
+  Parses a space separated series of entries, providing back a tuple with the
+  first entry in the string and the remainder (dropping the space between).
+  
+  This is meant to be a helper function for stem to parse tor's control
+  protocol lines rather than being used directly by this library's users.
+  
+  Example:
+    get_entry('hello there random person') =>
+      (None, "hello", "there random person")
+    get_entry('version="0.1.2.3"', True, True) =>
+      ("version", "0.1.2.3", "")
+    get_entry('"this has a \" and \\ in it" foo=bar more_data', False, True, True) =>
+      (None, 'this has a " and \ in it', "foo=bar more_data")
+  
+  Arguments:
+    line (str)     - string with a space separated series of entries
+    mapping (bool) - parses the next entry as a KEY=VALUE entry, if False then
+                     the 'key' attribute of the returned tuple is None
+    quoted (bool)  - parses the next entry as a quoted value, removing the
+                     quotes
+    escaped (bool) - unescapes the CONTROL_ESCAPES escape sequences
+  
+  Returns:
+    tuple of the form (key, value, remainder)
+  
+  Raises:
+    ValueError if 'mapping' is True without a '=' or 'quoted' is True without
+      the value being quoted
+  """
+  
+  # Start by splitting apart the 'key=everything else' portion. The key
+  # shouldn't have any spaces in it.
+  
+  if mapping:
+    key_match = re.match("^(\S+)=", line)
+    
+    if key_match:
+      key = key_match.groups()[0]
+      remainder = line[key_match.end():]
+    else:
+      raise ValueError("mapping doesn't contain a '=': " + line)
+  else: key, remainder = None, line
+  
+  if quoted:
+    # Check that we have a starting quote.
+    if not remainder.startswith("\""):
+      raise ValueError("quoted value doesn't have a leading quote: " + line)
+    
+    # Finds the ending quote. If we have escapes then we need to skip any '\"'
+    # entries.
+    end_quote = remainder.find("\"", 1)
+    
+    if is_escaped:
+      while end_quote != -1 and remainder[end_quote - 1] == "/":
+        end_quote = remainder.find("\"", end_quote + 1)
+    
+    # Check that we have an ending quote.
+    if end_quote == -1:
+      raise ValueError("quoted value doesn't have an ending quote: " + line)
+    
+    value, remainder = remainder[1:end_quote], remainder[end_quote + 1:]
+  else:
+    # Non-quoted value. Just need to check if there's more data afterward.
+    if " " in remainder: value, remainder = remainder.split(" ", 1)
+    else: value, remainder = remainder, ""
+  
+  if escaped:
+    for esc_sequence, replacement in CONTROL_ESCAPES.items():
+      value = value.replace(esc_sequence, replacement)
+  
+  return (key, value, remainder)
+





More information about the tor-commits mailing list