[tor-commits] [stem/master] Use BytesIO to collect socket content

21 Sep 2017

commit 2bbe4cd42c09de406335fca6abc96bcbde87fc05
Author: Damian Johnson <atagar@torproject.org>
Date:   Tue Sep 19 19:08:59 2017 -0700

    Use BytesIO to collect socket content
    
    Making an array out of the lines is unnecessary. BytesIO is specifically
    designed for this and more efficient.
---
 stem/socket.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/stem/socket.py b/stem/socket.py
index 9817c129..96e448bc 100644
--- a/stem/socket.py
+++ b/stem/socket.py
@@ -69,6 +69,7 @@ Tor...
 
 from __future__ import absolute_import
 
+import io
 import re
 import socket
 import threading
@@ -526,11 +527,12 @@ def recv_message(control_file):
       a complete message
   """
 
-  parsed_content, raw_content = [], []
+  parsed_content, raw_content = [], io.BytesIO()
 
   while True:
     try:
       line = control_file.readline()
+      raw_content.write(line)
     except AttributeError:
       # if the control_file has been closed then we will receive:
       # AttributeError: 'NoneType' object has no attribute 'recv'
@@ -549,8 +551,6 @@ def recv_message(control_file):
       log.info(ERROR_MSG % ('SocketClosed', 'received exception "%s"' % exc))
       raise stem.SocketClosed(exc)
 
-    raw_content.append(line)
-
     # Parses the tor control lines. These are of the form...
     # <status code><divider><content>\r\n
 
@@ -572,7 +572,6 @@ def recv_message(control_file):
 
     line = line[:-2]  # strips off the CRLF
     status_code, divider, content = line[:3], line[3:4], line[4:]
-    content_lines = [content]
 
     if stem.prereq.is_python_3():
       status_code = stem.util.str_tools._to_unicode(status_code)
@@ -585,10 +584,8 @@ def recv_message(control_file):
       # end of the message, return the message
       parsed_content.append((status_code, divider, content))
 
-      raw_content_str = b''.join(raw_content)
-
       if log.is_tracing():
-        log_message = stem.util.str_tools._to_unicode(raw_content_str.replace(b'\r\n', b'\n').rstrip())
+        log_message = stem.util.str_tools._to_unicode(raw_content.getvalue().replace(b'\r\n', b'\n').rstrip())
         log_message_lines = log_message.split('\n')
 
         if TRUNCATE_LOGS and len(log_message_lines) > TRUNCATE_LOGS:
@@ -599,22 +596,24 @@ def recv_message(control_file):
         else:
           log.trace('Received from tor: %s' % log_message.replace('\n', '\\n'))
 
-      return stem.response.ControlMessage(parsed_content, raw_content_str)
+      return stem.response.ControlMessage(parsed_content, raw_content.getvalue())
     elif divider == '+':
       # data entry, all of the following lines belong to the content until we
       # get a line with just a period
 
+      content_block = io.BytesIO()
+      content_block.write(content)
+
       while True:
         try:
-          line = stem.util.str_tools._to_bytes(control_file.readline())
+          line = control_file.readline()
+          raw_content.write(line)
         except socket.error as exc:
-          log.info(ERROR_MSG % ('SocketClosed', 'received an exception while mid-way through a data reply (exception: "%s", read content: "%s")' % (exc, log.escape(b''.join(raw_content)))))
+          log.info(ERROR_MSG % ('SocketClosed', 'received an exception while mid-way through a data reply (exception: "%s", read content: "%s")' % (exc, log.escape(raw_content.getvalue()))))
           raise stem.SocketClosed(exc)
 
-        raw_content.append(line)
-
         if not line.endswith(b'\r\n'):
-          log.info(ERROR_MSG % ('ProtocolError', 'CRLF linebreaks missing from a data reply, "%s"' % log.escape(b''.join(raw_content))))
+          log.info(ERROR_MSG % ('ProtocolError', 'CRLF linebreaks missing from a data reply, "%s"' % log.escape(raw_content.getvalue())))
           raise stem.ProtocolError('All lines should end with CRLF')
         elif line == b'.\r\n':
           break  # data block termination
@@ -627,12 +626,12 @@ def recv_message(control_file):
         if line.startswith(b'..'):
           line = line[1:]
 
-        content_lines.append(line)
+        content_block.write(b'\n' + line)
 
       # joins the content using a newline rather than CRLF separator (more
       # conventional for multi-line string content outside the windows world)
 
-      parsed_content.append((status_code, divider, b'\n'.join(content_lines)))
+      parsed_content.append((status_code, divider, content_block.getvalue()))
     else:
       # this should never be reached due to the prefix regex, but might as well
       # be safe...

    

[tor-commits] [stem/master] Use BytesIO to collect socket content

atagar＠torproject.org