commit 92f78d459497dd3c0d688c040e06d6cc6668c43c
Author: Damian Johnson <atagar(a)torproject.org>
Date: Mon Oct 10 19:54:30 2011 -0700
Moving ControlMessage and parsing into types
The ControlMessage is a standalone class so moving it into types. This is both
appropriate for the types module and will make testing easier.
---
stem/connection.py | 131 +----------------------------
stem/types.py | 237 ++++++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 208 insertions(+), 160 deletions(-)
diff --git a/stem/connection.py b/stem/connection.py
index cede227..5e1fe5e 100644
--- a/stem/connection.py
+++ b/stem/connection.py
@@ -6,62 +6,9 @@ import Queue
import socket
import threading
-from stem.util import log
-
-class ProtocolError(Exception):
- "Malformed content from the control socket."
- pass
+import stem.types
-class ControlMessage:
- """
- Represents a complete message from the control socket.
- """
-
- def __init__(self, lines, raw_content):
- if not lines: raise ValueError("Control messages can't be empty")
-
- # Parsed control message. This is a list of tuples with the form...
- # (status code, divider, content)
- self._lines = lines
-
- # String with the unparsed content read from the control port.
- self._raw_content = raw_content
-
- def get_raw_content(self):
- """
- Provides the unparsed content read from the control socket.
-
- Returns:
- string of the socket data used to generate this message
- """
-
- return self._raw_content
-
- def get_status_code(self, line = -1):
- """
- Provides the status code for a line of the message.
-
- Arguments:
- line - line for which the status code is returned
-
- Returns:
- string status code for the line
- """
-
- return self._lines[line][0]
-
- def __str__(self):
- return "\n".join(list(self))
-
- def __iter__(self):
- """
- Provides the parsed content of the message, not including the status codes
- and dividers.
- """
-
- for _, _, content in self._lines:
- for content_line in content.split("\n"):
- yield content_line
+from stem.util import log
class ControlConnection:
"""
@@ -165,9 +112,9 @@ class ControlConnection:
while self.is_running():
try:
- control_message = self._read_message()
+ control_message = stem.types.read_message(self._control_socket_file)
- if control_message.get_status_code() == "650":
+ if control_message.content()[-1][0] == "650":
# adds this to the event queue and wakes up the handler
self._event_cond.acquire()
@@ -177,78 +124,10 @@ class ControlConnection:
else:
# TODO: figure out a good method for terminating the socket thread
self._reply_queue.put(control_message)
- except ProtocolError, exc:
+ except stem.types.ProtocolError, exc:
log.log(log.ERR, "Error reading control socket message: %s" % exc)
# TODO: terminate?
- def _read_message(self):
- """
- Pulls from the control socket until we either have a complete message or
- encounter a problem.
-
- Returns:
- ControlMessage read from the socket
- """
-
- lines, raw_content = [], ""
-
- while True:
- line = self._control_socket_file.readline()
- raw_content += line
-
- # Tor control lines are of the form...
- # <status code><divider><content>\r\n
- #
- # status code - Three character code for the type of response (defined in
- # section 4 of the control-spec).
- # divider - Single character to indicate if this is mid-reply, data, or
- # an end to the message (defined in section 2.3 of the control-spec).
- # content - The following content is the actual payload of the line.
-
- if len(line) < 4:
- raise ProtocolError("Badly formatted reply line: too short")
- elif not line.endswith("\r\n"):
- raise ProtocolError("All lines should end with CRLF")
-
- line = line[:-2] # strips off the CRLF
- status_code, divider, content = line[:3], line[3], line[4:]
-
- if divider == "-":
- # mid-reply line, keep pulling for more content
- lines.append((status_code, divider, content))
- elif divider == " ":
- # end of the message, return the message
- lines.append((status_code, divider, content))
- return ControlMessage(lines, raw_content)
- elif divider == "+":
- # data entry, all of the following lines belong to the content until we
- # get a line with just a period
-
- while True:
- line = self._control_socket_file.readline()
- raw_content += line
-
- if not line.endswith("\r\n"):
- raise ProtocolError("All lines should end with CRLF")
- elif line == ".\r\n":
- break # data block termination
-
- line = line[:-2] # strips off the CRLF
-
- # lines starting with a pariod are escaped by a second period (as per
- # section 2.4 of the control-spec)
- if line.startswith(".."): line = line[1:]
-
- # appends to previous content, using a newline rather than CRLF
- # separator (more contentional for multi-line string content outside
- # the windows world)
-
- content += "\n" + line
-
- lines.append((status_code, divider, content))
- else:
- raise ProtocolError("Unrecognized type '%s': %s" % (divider, line))
-
def close(self):
"""
Terminates the control connection.
diff --git a/stem/types.py b/stem/types.py
index f4583e3..c73a769 100644
--- a/stem/types.py
+++ b/stem/types.py
@@ -1,13 +1,213 @@
"""
Classes for miscellaneous tor object. This includes...
-types.Version - Tor versioning information.
- * get_version(versionStr)
- Converts a version string to a types.Version instance.
+ProtocolError - Malformed socket data.
+ControlSocketClosed - Socket terminated.
+
+read_message - Reads a ControlMessage from a control socket.
+ControlMessage - Message from the control socket.
+ |- content - provides the parsed message content
+ |- raw_content - unparsed socket data
+ |- __str__ - content stripped of protocol formatting
+ +- __iter__ - message components stripped of protocol formatting
+
+get_version - Converts a version string to a Version instance.
+Version - Tor versioning information.
+ |- __str__ - string representation
+ +- __cmp__ - compares with another Version
"""
import re
+from stem.util import log
+
+class ProtocolError(Exception):
+ "Malformed content from the control socket."
+ pass
+
+class ControlSocketClosed(Exception):
+ "Control socket was closed before completing the message."
+ pass
+
+def read_message(control_file):
+ """
+ Pulls from a control socket until we either have a complete message or
+ encounter a problem.
+
+ Arguments:
+ control_file - file derived from the control socket (see the socket's
+ makefile() method for more information)
+
+ Returns:
+ ControlMessage read from the socket
+
+ Raises:
+ ProtocolError the content from the socket is malformed
+ ControlSocketClosed if the socket closes before we receive a complete
+ message
+ """
+
+ parsed_content, raw_content = [], ""
+
+ while True:
+ line = control_file.readline()
+ raw_content += line
+
+ # Parses the tor control lines. These are of the form...
+ # <status code><divider><content>\r\n
+
+ if len(line) < 4:
+ log.log(log.WARN, "ProtocolError: line too short (%s)" % line)
+ raise ProtocolError("Badly formatted reply line: too short")
+ elif not line.endswith("\r\n"):
+ log.log(log.WARN, "ProtocolError: no CRLF linebreak (%s)" % line)
+ raise ProtocolError("All lines should end with CRLF")
+
+ line = line[:-2] # strips off the CRLF
+ status_code, divider, content = line[:3], line[3], line[4:]
+
+ if divider == "-":
+ # mid-reply line, keep pulling for more content
+ parsed_content.append((status_code, divider, content))
+ elif divider == " ":
+ # end of the message, return the message
+ parsed_content.append((status_code, divider, content))
+
+ log.log(log.DEBUG, "Received message:\n" + raw_content)
+
+ return ControlMessage(parsed_content, raw_content)
+ elif divider == "+":
+ # data entry, all of the following lines belong to the content until we
+ # get a line with just a period
+
+ while True:
+ line = control_file.readline()
+ raw_content += line
+
+ if not line.endswith("\r\n"):
+ log.log(log.WARN, "ProtocolError: no CRLF linebreak for data entry (%s)" % line)
+ raise ProtocolError("All lines should end with CRLF")
+ elif line == ".\r\n":
+ break # data block termination
+
+ line = line[:-2] # strips off the CRLF
+
+ # lines starting with a pariod are escaped by a second period (as per
+ # section 2.4 of the control-spec)
+ if line.startswith(".."): line = line[1:]
+
+ # appends to previous content, using a newline rather than CRLF
+ # separator (more contentional for multi-line string content outside
+ # the windows world)
+
+ content += "\n" + line
+
+ parsed_content.append((status_code, divider, content))
+ else:
+ log.log(log.WARN, "ProtocolError: unrecognized divider type (%s)" % line)
+ raise ProtocolError("Unrecognized type '%s': %s" % (divider, line))
+
+class ControlMessage:
+ """
+ Message from the control socket. This is iterable and can be stringified for
+ individual message components stripped of protocol formatting.
+ """
+
+ def __init__(self, parsed_content, raw_content):
+ self._parsed_content = parsed_content
+ self._raw_content = raw_content
+
+ def content(self):
+ """
+ Provides the parsed message content. These are entries of the form...
+ (status_code, divider, content)
+
+ * status_code - Three character code for the type of response (defined in
+ section 4 of the control-spec).
+ * divider - Single character to indicate if this is mid-reply, data, or
+ an end to the message (defined in section 2.3 of the
+ control-spec).
+ * content - The following content is the actual payload of the line.
+
+ For data entries the content is the full multi-line payload with newline
+ linebreaks and leading periods unescaped.
+
+ Returns:
+ list of (str, str, str) tuples for the components of this message
+ """
+
+ return list(self._parsed_content)
+
+ def raw_content(self):
+ """
+ Provides the unparsed content read from the control socket.
+
+ Returns:
+ string of the socket data used to generate this message
+ """
+
+ return self._raw_content
+
+ def __str__(self):
+ """
+ Content of the message, stripped of status code and divider protocol
+ formatting.
+ """
+
+ return "\n".join(list(self))
+
+ def __iter__(self):
+ """
+ Provides the content of the message (stripped of status codes and dividers)
+ for each component of the message. Ie...
+
+ 250+info/names=
+ desc/id/* -- Router descriptors by ID.
+ desc/name/* -- Router descriptors by nickname.
+ .
+ 250 OK
+
+ Would provide two entries...
+ 1st - "info/names=
+ desc/id/* -- Router descriptors by ID.
+ desc/name/* -- Router descriptors by nickname."
+ 2nd - "OK"
+ """
+
+ for _, _, content in self._parsed_content:
+ yield content
+
+def get_version(version_str):
+ """
+ Parses a version string, providing back a types.Version instance.
+
+ Arguments:
+ version_str (str) - representation of a tor version (ex. "0.2.2.23-alpha")
+
+ Returns:
+ types.Version instance
+
+ Raises:
+ ValueError if input isn't a valid tor version
+ """
+
+ if not isinstance(version_str, str):
+ raise ValueError("argument is not a string")
+
+ m = re.match(r'^([0-9]+).([0-9]+).([0-9]+)(.[0-9]+)?(-\S*)?$', version_str)
+
+ if m:
+ major, minor, micro, patch, status = m.groups()
+
+ # The patch and status matches are optional (may be None) and have an extra
+ # proceeding period or dash if they exist. Stripping those off.
+
+ if patch: patch = int(patch[1:])
+ if status: status = status[1:]
+
+ return Version(int(major), int(minor), int(micro), patch, status)
+ else: raise ValueError("'%s' isn't a properly formatted tor version" % version_str)
+
class Version:
"""
Comparable tor version, as per the 'new version' of the version-spec...
@@ -66,34 +266,3 @@ class Version:
return cmp(my_status, other_status)
-def get_version(version_str):
- """
- Parses a version string, providing back a types.Version instance.
-
- Arguments:
- version_str (str) - representation of a tor version (ex. "0.2.2.23-alpha")
-
- Returns:
- types.Version instance
-
- Raises:
- ValueError if input isn't a valid tor version
- """
-
- if not isinstance(version_str, str):
- raise ValueError("argument is not a string")
-
- m = re.match(r'^([0-9]+).([0-9]+).([0-9]+)(.[0-9]+)?(-\S*)?$', version_str)
-
- if m:
- major, minor, micro, patch, status = m.groups()
-
- # The patch and status matches are optional (may be None) and have an extra
- # proceeding period or dash if they exist. Stripping those off.
-
- if patch: patch = int(patch[1:])
- if status: status = status[1:]
-
- return Version(int(major), int(minor), int(micro), patch, status)
- else: raise ValueError("'%s' isn't a properly formatted tor version" % version_str)
-