[tor-commits] [stem/master] Moving ControlMessage and parsing into types

atagar at torproject.org atagar at torproject.org
Tue Oct 11 04:26:10 UTC 2011


commit 92f78d459497dd3c0d688c040e06d6cc6668c43c
Author: Damian Johnson <atagar at torproject.org>
Date:   Mon Oct 10 19:54:30 2011 -0700

    Moving ControlMessage and parsing into types
    
    The ControlMessage is a standalone class so moving it into types. This is both
    appropriate for the types module and will make testing easier.
---
 stem/connection.py |  131 +----------------------------
 stem/types.py      |  237 ++++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 208 insertions(+), 160 deletions(-)

diff --git a/stem/connection.py b/stem/connection.py
index cede227..5e1fe5e 100644
--- a/stem/connection.py
+++ b/stem/connection.py
@@ -6,62 +6,9 @@ import Queue
 import socket
 import threading
 
-from stem.util import log
-
-class ProtocolError(Exception):
-  "Malformed content from the control socket."
-  pass
+import stem.types
 
-class ControlMessage:
-  """
-  Represents a complete message from the control socket.
-  """
-  
-  def __init__(self, lines, raw_content):
-    if not lines: raise ValueError("Control messages can't be empty")
-    
-    # Parsed control message. This is a list of tuples with the form...
-    # (status code, divider, content)
-    self._lines = lines
-    
-    # String with the unparsed content read from the control port.
-    self._raw_content = raw_content
-  
-  def get_raw_content(self):
-    """
-    Provides the unparsed content read from the control socket.
-    
-    Returns:
-      string of the socket data used to generate this message
-    """
-    
-    return self._raw_content
-  
-  def get_status_code(self, line = -1):
-    """
-    Provides the status code for a line of the message.
-    
-    Arguments:
-      line - line for which the status code is returned
-    
-    Returns:
-      string status code for the line
-    """
-    
-    return self._lines[line][0]
-  
-  def __str__(self):
-    return "\n".join(list(self))
-  
-  def __iter__(self):
-    """
-    Provides the parsed content of the message, not including the status codes
-    and dividers.
-    """
-    
-    for _, _, content in self._lines:
-      for content_line in content.split("\n"):
-        yield content_line
+from stem.util import log
 
 class ControlConnection:
   """
@@ -165,9 +112,9 @@ class ControlConnection:
     
     while self.is_running():
       try:
-        control_message = self._read_message()
+        control_message = stem.types.read_message(self._control_socket_file)
         
-        if control_message.get_status_code() == "650":
+        if control_message.content()[-1][0] == "650":
           # adds this to the event queue and wakes up the handler
           
           self._event_cond.acquire()
@@ -177,78 +124,10 @@ class ControlConnection:
         else:
           # TODO: figure out a good method for terminating the socket thread
           self._reply_queue.put(control_message)
-      except ProtocolError, exc:
+      except stem.types.ProtocolError, exc:
         log.log(log.ERR, "Error reading control socket message: %s" % exc)
         # TODO: terminate?
   
-  def _read_message(self):
-    """
-    Pulls from the control socket until we either have a complete message or
-    encounter a problem.
-    
-    Returns:
-      ControlMessage read from the socket
-    """
-    
-    lines, raw_content = [], ""
-    
-    while True:
-      line = self._control_socket_file.readline()
-      raw_content += line
-      
-      # Tor control lines are of the form...
-      # <status code><divider><content>\r\n
-      #
-      # status code - Three character code for the type of response (defined in
-      #     section 4 of the control-spec).
-      # divider - Single character to indicate if this is mid-reply, data, or
-      #     an end to the message (defined in section 2.3 of the control-spec).
-      # content - The following content is the actual payload of the line.
-      
-      if len(line) < 4:
-        raise ProtocolError("Badly formatted reply line: too short")
-      elif not line.endswith("\r\n"):
-        raise ProtocolError("All lines should end with CRLF")
-      
-      line = line[:-2] # strips off the CRLF
-      status_code, divider, content = line[:3], line[3], line[4:]
-      
-      if divider == "-":
-        # mid-reply line, keep pulling for more content
-        lines.append((status_code, divider, content))
-      elif divider == " ":
-        # end of the message, return the message
-        lines.append((status_code, divider, content))
-        return ControlMessage(lines, raw_content)
-      elif divider == "+":
-        # data entry, all of the following lines belong to the content until we
-        # get a line with just a period
-        
-        while True:
-          line = self._control_socket_file.readline()
-          raw_content += line
-          
-          if not line.endswith("\r\n"):
-            raise ProtocolError("All lines should end with CRLF")
-          elif line == ".\r\n":
-            break # data block termination
-          
-          line = line[:-2] # strips off the CRLF
-          
-          # lines starting with a pariod are escaped by a second period (as per
-          # section 2.4 of the control-spec)
-          if line.startswith(".."): line = line[1:]
-          
-          # appends to previous content, using a newline rather than CRLF
-          # separator (more contentional for multi-line string content outside
-          # the windows world)
-          
-          content += "\n" + line
-        
-        lines.append((status_code, divider, content))
-      else:
-        raise ProtocolError("Unrecognized type '%s': %s" % (divider, line))
-  
   def close(self):
     """
     Terminates the control connection.
diff --git a/stem/types.py b/stem/types.py
index f4583e3..c73a769 100644
--- a/stem/types.py
+++ b/stem/types.py
@@ -1,13 +1,213 @@
 """
 Classes for miscellaneous tor object. This includes...
 
-types.Version - Tor versioning information.
-  * get_version(versionStr)
-    Converts a version string to a types.Version instance.
+ProtocolError - Malformed socket data.
+ControlSocketClosed - Socket terminated.
+
+read_message - Reads a ControlMessage from a control socket.
+ControlMessage - Message from the control socket.
+  |- content - provides the parsed message content
+  |- raw_content - unparsed socket data
+  |- __str__ - content stripped of protocol formatting
+  +- __iter__ - message components stripped of protocol formatting
+
+get_version - Converts a version string to a Version instance.
+Version - Tor versioning information.
+  |- __str__ - string representation
+  +- __cmp__ - compares with another Version
 """
 
 import re
 
+from stem.util import log
+
+class ProtocolError(Exception):
+  "Malformed content from the control socket."
+  pass
+
+class ControlSocketClosed(Exception):
+  "Control socket was closed before completing the message."
+  pass
+
+def read_message(control_file):
+  """
+  Pulls from a control socket until we either have a complete message or
+  encounter a problem.
+  
+  Arguments:
+    control_file - file derived from the control socket (see the socket's
+                   makefile() method for more information)
+  
+  Returns:
+    ControlMessage read from the socket
+  
+  Raises:
+    ProtocolError the content from the socket is malformed
+    ControlSocketClosed if the socket closes before we receive a complete
+      message
+  """
+  
+  parsed_content, raw_content = [], ""
+  
+  while True:
+    line = control_file.readline()
+    raw_content += line
+    
+    # Parses the tor control lines. These are of the form...
+    # <status code><divider><content>\r\n
+    
+    if len(line) < 4:
+      log.log(log.WARN, "ProtocolError: line too short (%s)" % line)
+      raise ProtocolError("Badly formatted reply line: too short")
+    elif not line.endswith("\r\n"):
+      log.log(log.WARN, "ProtocolError: no CRLF linebreak (%s)" % line)
+      raise ProtocolError("All lines should end with CRLF")
+    
+    line = line[:-2] # strips off the CRLF
+    status_code, divider, content = line[:3], line[3], line[4:]
+    
+    if divider == "-":
+      # mid-reply line, keep pulling for more content
+      parsed_content.append((status_code, divider, content))
+    elif divider == " ":
+      # end of the message, return the message
+      parsed_content.append((status_code, divider, content))
+      
+      log.log(log.DEBUG, "Received message:\n" + raw_content)
+      
+      return ControlMessage(parsed_content, raw_content)
+    elif divider == "+":
+      # data entry, all of the following lines belong to the content until we
+      # get a line with just a period
+      
+      while True:
+        line = control_file.readline()
+        raw_content += line
+        
+        if not line.endswith("\r\n"):
+          log.log(log.WARN, "ProtocolError: no CRLF linebreak for data entry (%s)" % line)
+          raise ProtocolError("All lines should end with CRLF")
+        elif line == ".\r\n":
+          break # data block termination
+        
+        line = line[:-2] # strips off the CRLF
+        
+        # lines starting with a pariod are escaped by a second period (as per
+        # section 2.4 of the control-spec)
+        if line.startswith(".."): line = line[1:]
+        
+        # appends to previous content, using a newline rather than CRLF
+        # separator (more contentional for multi-line string content outside
+        # the windows world)
+        
+        content += "\n" + line
+      
+      parsed_content.append((status_code, divider, content))
+    else:
+      log.log(log.WARN, "ProtocolError: unrecognized divider type (%s)" % line)
+      raise ProtocolError("Unrecognized type '%s': %s" % (divider, line))
+
+class ControlMessage:
+  """
+  Message from the control socket. This is iterable and can be stringified for
+  individual message components stripped of protocol formatting.
+  """
+  
+  def __init__(self, parsed_content, raw_content):
+    self._parsed_content = parsed_content
+    self._raw_content = raw_content
+  
+  def content(self):
+    """
+    Provides the parsed message content. These are entries of the form...
+    (status_code, divider, content)
+    
+    * status_code - Three character code for the type of response (defined in
+                    section 4 of the control-spec).
+    * divider     - Single character to indicate if this is mid-reply, data, or
+                    an end to the message (defined in section 2.3 of the
+                    control-spec).
+    * content     - The following content is the actual payload of the line.
+    
+    For data entries the content is the full multi-line payload with newline
+    linebreaks and leading periods unescaped.
+    
+    Returns:
+      list of (str, str, str) tuples for the components of this message
+    """
+    
+    return list(self._parsed_content)
+  
+  def raw_content(self):
+    """
+    Provides the unparsed content read from the control socket.
+    
+    Returns:
+      string of the socket data used to generate this message
+    """
+    
+    return self._raw_content
+  
+  def __str__(self):
+    """
+    Content of the message, stripped of status code and divider protocol
+    formatting.
+    """
+    
+    return "\n".join(list(self))
+  
+  def __iter__(self):
+    """
+    Provides the content of the message (stripped of status codes and dividers)
+    for each component of the message. Ie...
+    
+    250+info/names=
+    desc/id/* -- Router descriptors by ID.
+    desc/name/* -- Router descriptors by nickname.
+    .
+    250 OK
+    
+    Would provide two entries...
+    1st - "info/names=
+           desc/id/* -- Router descriptors by ID.
+           desc/name/* -- Router descriptors by nickname."
+    2nd - "OK"
+    """
+    
+    for _, _, content in self._parsed_content:
+      yield content
+
+def get_version(version_str):
+  """
+  Parses a version string, providing back a types.Version instance.
+  
+  Arguments:
+    version_str (str) - representation of a tor version (ex. "0.2.2.23-alpha")
+  
+  Returns:
+    types.Version instance
+  
+  Raises:
+    ValueError if input isn't a valid tor version
+  """
+  
+  if not isinstance(version_str, str):
+    raise ValueError("argument is not a string")
+  
+  m = re.match(r'^([0-9]+).([0-9]+).([0-9]+)(.[0-9]+)?(-\S*)?$', version_str)
+  
+  if m:
+    major, minor, micro, patch, status = m.groups()
+    
+    # The patch and status matches are optional (may be None) and have an extra
+    # proceeding period or dash if they exist. Stripping those off.
+    
+    if patch: patch = int(patch[1:])
+    if status: status = status[1:]
+    
+    return Version(int(major), int(minor), int(micro), patch, status)
+  else: raise ValueError("'%s' isn't a properly formatted tor version" % version_str)
+
 class Version:
   """
   Comparable tor version, as per the 'new version' of the version-spec...
@@ -66,34 +266,3 @@ class Version:
     
     return cmp(my_status, other_status)
 
-def get_version(version_str):
-  """
-  Parses a version string, providing back a types.Version instance.
-  
-  Arguments:
-    version_str (str) - representation of a tor version (ex. "0.2.2.23-alpha")
-  
-  Returns:
-    types.Version instance
-  
-  Raises:
-    ValueError if input isn't a valid tor version
-  """
-  
-  if not isinstance(version_str, str):
-    raise ValueError("argument is not a string")
-  
-  m = re.match(r'^([0-9]+).([0-9]+).([0-9]+)(.[0-9]+)?(-\S*)?$', version_str)
-  
-  if m:
-    major, minor, micro, patch, status = m.groups()
-    
-    # The patch and status matches are optional (may be None) and have an extra
-    # proceeding period or dash if they exist. Stripping those off.
-    
-    if patch: patch = int(patch[1:])
-    if status: status = status[1:]
-    
-    return Version(int(major), int(minor), int(micro), patch, status)
-  else: raise ValueError("'%s' isn't a properly formatted tor version" % version_str)
-





More information about the tor-commits mailing list