commit 92f78d459497dd3c0d688c040e06d6cc6668c43c Author: Damian Johnson atagar@torproject.org Date: Mon Oct 10 19:54:30 2011 -0700
Moving ControlMessage and parsing into types
The ControlMessage is a standalone class so moving it into types. This is both appropriate for the types module and will make testing easier. --- stem/connection.py | 131 +---------------------------- stem/types.py | 237 ++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 208 insertions(+), 160 deletions(-)
diff --git a/stem/connection.py b/stem/connection.py index cede227..5e1fe5e 100644 --- a/stem/connection.py +++ b/stem/connection.py @@ -6,62 +6,9 @@ import Queue import socket import threading
-from stem.util import log - -class ProtocolError(Exception): - "Malformed content from the control socket." - pass +import stem.types
-class ControlMessage: - """ - Represents a complete message from the control socket. - """ - - def __init__(self, lines, raw_content): - if not lines: raise ValueError("Control messages can't be empty") - - # Parsed control message. This is a list of tuples with the form... - # (status code, divider, content) - self._lines = lines - - # String with the unparsed content read from the control port. - self._raw_content = raw_content - - def get_raw_content(self): - """ - Provides the unparsed content read from the control socket. - - Returns: - string of the socket data used to generate this message - """ - - return self._raw_content - - def get_status_code(self, line = -1): - """ - Provides the status code for a line of the message. - - Arguments: - line - line for which the status code is returned - - Returns: - string status code for the line - """ - - return self._lines[line][0] - - def __str__(self): - return "\n".join(list(self)) - - def __iter__(self): - """ - Provides the parsed content of the message, not including the status codes - and dividers. - """ - - for _, _, content in self._lines: - for content_line in content.split("\n"): - yield content_line +from stem.util import log
class ControlConnection: """ @@ -165,9 +112,9 @@ class ControlConnection:
while self.is_running(): try: - control_message = self._read_message() + control_message = stem.types.read_message(self._control_socket_file)
- if control_message.get_status_code() == "650": + if control_message.content()[-1][0] == "650": # adds this to the event queue and wakes up the handler
self._event_cond.acquire() @@ -177,78 +124,10 @@ class ControlConnection: else: # TODO: figure out a good method for terminating the socket thread self._reply_queue.put(control_message) - except ProtocolError, exc: + except stem.types.ProtocolError, exc: log.log(log.ERR, "Error reading control socket message: %s" % exc) # TODO: terminate?
- def _read_message(self): - """ - Pulls from the control socket until we either have a complete message or - encounter a problem. - - Returns: - ControlMessage read from the socket - """ - - lines, raw_content = [], "" - - while True: - line = self._control_socket_file.readline() - raw_content += line - - # Tor control lines are of the form... - # <status code><divider><content>\r\n - # - # status code - Three character code for the type of response (defined in - # section 4 of the control-spec). - # divider - Single character to indicate if this is mid-reply, data, or - # an end to the message (defined in section 2.3 of the control-spec). - # content - The following content is the actual payload of the line. - - if len(line) < 4: - raise ProtocolError("Badly formatted reply line: too short") - elif not line.endswith("\r\n"): - raise ProtocolError("All lines should end with CRLF") - - line = line[:-2] # strips off the CRLF - status_code, divider, content = line[:3], line[3], line[4:] - - if divider == "-": - # mid-reply line, keep pulling for more content - lines.append((status_code, divider, content)) - elif divider == " ": - # end of the message, return the message - lines.append((status_code, divider, content)) - return ControlMessage(lines, raw_content) - elif divider == "+": - # data entry, all of the following lines belong to the content until we - # get a line with just a period - - while True: - line = self._control_socket_file.readline() - raw_content += line - - if not line.endswith("\r\n"): - raise ProtocolError("All lines should end with CRLF") - elif line == ".\r\n": - break # data block termination - - line = line[:-2] # strips off the CRLF - - # lines starting with a pariod are escaped by a second period (as per - # section 2.4 of the control-spec) - if line.startswith(".."): line = line[1:] - - # appends to previous content, using a newline rather than CRLF - # separator (more contentional for multi-line string content outside - # the windows world) - - content += "\n" + line - - lines.append((status_code, divider, content)) - else: - raise ProtocolError("Unrecognized type '%s': %s" % (divider, line)) - def close(self): """ Terminates the control connection. diff --git a/stem/types.py b/stem/types.py index f4583e3..c73a769 100644 --- a/stem/types.py +++ b/stem/types.py @@ -1,13 +1,213 @@ """ Classes for miscellaneous tor object. This includes...
-types.Version - Tor versioning information. - * get_version(versionStr) - Converts a version string to a types.Version instance. +ProtocolError - Malformed socket data. +ControlSocketClosed - Socket terminated. + +read_message - Reads a ControlMessage from a control socket. +ControlMessage - Message from the control socket. + |- content - provides the parsed message content + |- raw_content - unparsed socket data + |- __str__ - content stripped of protocol formatting + +- __iter__ - message components stripped of protocol formatting + +get_version - Converts a version string to a Version instance. +Version - Tor versioning information. + |- __str__ - string representation + +- __cmp__ - compares with another Version """
import re
+from stem.util import log + +class ProtocolError(Exception): + "Malformed content from the control socket." + pass + +class ControlSocketClosed(Exception): + "Control socket was closed before completing the message." + pass + +def read_message(control_file): + """ + Pulls from a control socket until we either have a complete message or + encounter a problem. + + Arguments: + control_file - file derived from the control socket (see the socket's + makefile() method for more information) + + Returns: + ControlMessage read from the socket + + Raises: + ProtocolError the content from the socket is malformed + ControlSocketClosed if the socket closes before we receive a complete + message + """ + + parsed_content, raw_content = [], "" + + while True: + line = control_file.readline() + raw_content += line + + # Parses the tor control lines. These are of the form... + # <status code><divider><content>\r\n + + if len(line) < 4: + log.log(log.WARN, "ProtocolError: line too short (%s)" % line) + raise ProtocolError("Badly formatted reply line: too short") + elif not line.endswith("\r\n"): + log.log(log.WARN, "ProtocolError: no CRLF linebreak (%s)" % line) + raise ProtocolError("All lines should end with CRLF") + + line = line[:-2] # strips off the CRLF + status_code, divider, content = line[:3], line[3], line[4:] + + if divider == "-": + # mid-reply line, keep pulling for more content + parsed_content.append((status_code, divider, content)) + elif divider == " ": + # end of the message, return the message + parsed_content.append((status_code, divider, content)) + + log.log(log.DEBUG, "Received message:\n" + raw_content) + + return ControlMessage(parsed_content, raw_content) + elif divider == "+": + # data entry, all of the following lines belong to the content until we + # get a line with just a period + + while True: + line = control_file.readline() + raw_content += line + + if not line.endswith("\r\n"): + log.log(log.WARN, "ProtocolError: no CRLF linebreak for data entry (%s)" % line) + raise ProtocolError("All lines should end with CRLF") + elif line == ".\r\n": + break # data block termination + + line = line[:-2] # strips off the CRLF + + # lines starting with a pariod are escaped by a second period (as per + # section 2.4 of the control-spec) + if line.startswith(".."): line = line[1:] + + # appends to previous content, using a newline rather than CRLF + # separator (more contentional for multi-line string content outside + # the windows world) + + content += "\n" + line + + parsed_content.append((status_code, divider, content)) + else: + log.log(log.WARN, "ProtocolError: unrecognized divider type (%s)" % line) + raise ProtocolError("Unrecognized type '%s': %s" % (divider, line)) + +class ControlMessage: + """ + Message from the control socket. This is iterable and can be stringified for + individual message components stripped of protocol formatting. + """ + + def __init__(self, parsed_content, raw_content): + self._parsed_content = parsed_content + self._raw_content = raw_content + + def content(self): + """ + Provides the parsed message content. These are entries of the form... + (status_code, divider, content) + + * status_code - Three character code for the type of response (defined in + section 4 of the control-spec). + * divider - Single character to indicate if this is mid-reply, data, or + an end to the message (defined in section 2.3 of the + control-spec). + * content - The following content is the actual payload of the line. + + For data entries the content is the full multi-line payload with newline + linebreaks and leading periods unescaped. + + Returns: + list of (str, str, str) tuples for the components of this message + """ + + return list(self._parsed_content) + + def raw_content(self): + """ + Provides the unparsed content read from the control socket. + + Returns: + string of the socket data used to generate this message + """ + + return self._raw_content + + def __str__(self): + """ + Content of the message, stripped of status code and divider protocol + formatting. + """ + + return "\n".join(list(self)) + + def __iter__(self): + """ + Provides the content of the message (stripped of status codes and dividers) + for each component of the message. Ie... + + 250+info/names= + desc/id/* -- Router descriptors by ID. + desc/name/* -- Router descriptors by nickname. + . + 250 OK + + Would provide two entries... + 1st - "info/names= + desc/id/* -- Router descriptors by ID. + desc/name/* -- Router descriptors by nickname." + 2nd - "OK" + """ + + for _, _, content in self._parsed_content: + yield content + +def get_version(version_str): + """ + Parses a version string, providing back a types.Version instance. + + Arguments: + version_str (str) - representation of a tor version (ex. "0.2.2.23-alpha") + + Returns: + types.Version instance + + Raises: + ValueError if input isn't a valid tor version + """ + + if not isinstance(version_str, str): + raise ValueError("argument is not a string") + + m = re.match(r'^([0-9]+).([0-9]+).([0-9]+)(.[0-9]+)?(-\S*)?$', version_str) + + if m: + major, minor, micro, patch, status = m.groups() + + # The patch and status matches are optional (may be None) and have an extra + # proceeding period or dash if they exist. Stripping those off. + + if patch: patch = int(patch[1:]) + if status: status = status[1:] + + return Version(int(major), int(minor), int(micro), patch, status) + else: raise ValueError("'%s' isn't a properly formatted tor version" % version_str) + class Version: """ Comparable tor version, as per the 'new version' of the version-spec... @@ -66,34 +266,3 @@ class Version:
return cmp(my_status, other_status)
-def get_version(version_str): - """ - Parses a version string, providing back a types.Version instance. - - Arguments: - version_str (str) - representation of a tor version (ex. "0.2.2.23-alpha") - - Returns: - types.Version instance - - Raises: - ValueError if input isn't a valid tor version - """ - - if not isinstance(version_str, str): - raise ValueError("argument is not a string") - - m = re.match(r'^([0-9]+).([0-9]+).([0-9]+)(.[0-9]+)?(-\S*)?$', version_str) - - if m: - major, minor, micro, patch, status = m.groups() - - # The patch and status matches are optional (may be None) and have an extra - # proceeding period or dash if they exist. Stripping those off. - - if patch: patch = int(patch[1:]) - if status: status = status[1:] - - return Version(int(major), int(minor), int(micro), patch, status) - else: raise ValueError("'%s' isn't a properly formatted tor version" % version_str) -