commit b12e9c01c318863f4e4cddacf341abfd197ff412 Author: Damian Johnson atagar@torproject.org Date: Wed Mar 20 18:28:55 2013 -0700
Storing raw descriptor content as bytes
Tor descriptors are byte strings. While most content is ASCII (and hence maps to UTF8) there are a couple exceptions: the server descriptor's contact and platform lines which are arbitrary byte content. Usually this is unicode but on occasion it isn't (14 of 3044 in my old consensus cache).
In those cases we converted the content to unicode and replaced the un-decodeable characters. This in turn caused our _verify_digest() check to report the descriptor content as being malformed if the user had pycrypto installed.
Changing the raw contents the Descriptor class tracks to be bytes instead of unicode. This actually leads to far more elegant code, letting us drop the UnicodeReader which was always a pretty gross hack.
This breaks python 2.5 compatability by using the io.BytesIO class. Presently I'm checking with tor-dev@ to see if anyone objects with dropping python 2.5 support entirely...
https://lists.torproject.org/pipermail/tor-dev/2013-March/004551.html
If the answer is...
* yes, drop support: we'll replace test.mocking.BytesBuffer with io.BytesIO (they both do the exact same thing)
* no, keep support: we'll need to move test.mocking.BytesBuffer to a util and use it instead of io.BytesIO --- stem/control.py | 7 +- stem/descriptor/__init__.py | 84 +++++-------------- stem/descriptor/extrainfo_descriptor.py | 5 +- stem/descriptor/microdescriptor.py | 11 ++- stem/descriptor/networkstatus.py | 29 ++++--- stem/descriptor/router_status_entry.py | 5 +- stem/descriptor/server_descriptor.py | 15 ++-- stem/response/events.py | 6 +- test/integ/descriptor/microdescriptor.py | 12 ++-- test/integ/descriptor/server_descriptor.py | 2 +- test/mocking.py | 87 ++++++++++++++++--- test/unit/descriptor/extrainfo_descriptor.py | 4 +- test/unit/descriptor/microdescriptor.py | 10 +- .../descriptor/networkstatus/bridge_document.py | 4 +- .../networkstatus/directory_authority.py | 12 ++-- test/unit/descriptor/networkstatus/document_v3.py | 34 ++++---- .../descriptor/networkstatus/key_certificate.py | 4 +- test/unit/descriptor/router_status_entry.py | 16 ++-- test/unit/descriptor/server_descriptor.py | 28 +++--- test/unit/tutorial.py | 4 +- 20 files changed, 200 insertions(+), 179 deletions(-)
diff --git a/stem/control.py b/stem/control.py index d64373b..4d91ecd 100644 --- a/stem/control.py +++ b/stem/control.py @@ -136,6 +136,7 @@ providing its own for interacting at a higher level.
from __future__ import with_statement
+import io import os import Queue import StringIO @@ -156,7 +157,7 @@ import stem.util.tor_tools import stem.version
from stem import UNDEFINED, CircStatus, Signal -from stem.util import log +from stem.util import log, str_tools
# state changes a control socket can have
@@ -1120,7 +1121,7 @@ class Controller(BaseController):
desc_content = self.get_info("desc/all-recent")
- for desc in stem.descriptor.server_descriptor._parse_file(StringIO.StringIO(desc_content)): + for desc in stem.descriptor.server_descriptor._parse_file(io.BytesIO(str_tools._to_bytes(desc_content))): yield desc except Exception, exc: if default == UNDEFINED: @@ -1190,7 +1191,7 @@ class Controller(BaseController): desc_content = self.get_info("ns/all")
desc_iterator = stem.descriptor.router_status_entry._parse_file( - StringIO.StringIO(desc_content), + io.BytesIO(str_tools._to_bytes(desc_content)), True, entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2, ) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 4f0a596..40c1049 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -143,10 +143,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen import stem.descriptor.extrainfo_descriptor import stem.descriptor.networkstatus
- # attempt to read content as unicode - - descriptor_file = _UnicodeReader(descriptor_file) - # The tor descriptor specifications do not provide a reliable method for # identifying a descriptor file's type and version so we need to guess # based on its filename. Metrics descriptors, however, can be identified @@ -154,13 +150,14 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen # https://trac.torproject.org/5651
initial_position = descriptor_file.tell() - first_line = descriptor_file.readline().strip() + first_line = stem.util.str_tools._to_unicode(descriptor_file.readline().strip()) metrics_header_match = re.match("^@type (\S+) (\d+).(\d+)$", first_line)
if not metrics_header_match: descriptor_file.seek(initial_position)
- filename = '<undefined>' if descriptor_file.name is None else os.path.basename(descriptor_file.name) + descriptor_path = getattr(descriptor_file, 'name', None) + filename = '<undefined>' if descriptor_path is None else os.path.basename(descriptor_file.name) file_parser = None
if descriptor_type is not None: @@ -192,8 +189,8 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
if file_parser: for desc in file_parser(descriptor_file): - if descriptor_file.name is not None: - desc._set_path(os.path.abspath(descriptor_file.name)) + if descriptor_path is not None: + desc._set_path(os.path.abspath(descriptor_path))
yield desc
@@ -286,6 +283,17 @@ class Descriptor(object):
return self._archive_path
+ def get_bytes(self): + """ + Provides the ASCII **bytes** of the descriptor. This only differs from + **str()** if you're running python 3.x, in which case **str()** provides a + **unicode** string. + + :returns: **bytes** for the descriptor's contents + """ + + return self._raw_contents + def get_unrecognized_lines(self): """ Provides a list of lines that were either ignored or had data that we did @@ -305,61 +313,9 @@ class Descriptor(object):
def __str__(self): if stem.prereq.is_python_3(): - return self._raw_contents + return stem.util.str_tools._to_unicode(self._raw_contents) else: - return str(stem.util.str_tools._to_bytes(self._raw_contents)) - - -class _UnicodeReader(object): - """ - File-like object that wraps another file. This replaces read ASCII bytes with - unicode content. This only supports read operations. - """ - - def __init__(self, wrapped_file): - self.wrapped_file = wrapped_file - self.name = getattr(wrapped_file, 'name', None) - - def close(self): - return self.wrapped_file.close() - - def getvalue(self): - return self.wrapped_file.getvalue() - - def isatty(self): - return self.wrapped_file.isatty() - - def next(self): - return self.wrapped_file.next() - - def read(self, n = -1): - return stem.util.str_tools._to_unicode(self.wrapped_file.read(n)) - - def readline(self): - return stem.util.str_tools._to_unicode(self.wrapped_file.readline()) - - def readlines(self, sizehint = None): - # being careful to do in-place conversion so we don't accidently double our - # memory usage - - if sizehint is not None: - results = self.wrapped_file.readlines(sizehint) - else: - results = self.wrapped_file.readlines() - - for i in xrange(len(results)): - results[i] = stem.util.str_tools._to_unicode(results[i]) - - return results - - def seek(self, pos, mode = None): - if mode is not None: - return self.wrapped_file.seek(pos, mode) - else: - return self.wrapped_file.seek(pos) - - def tell(self): - return self.wrapped_file.tell() + return self._raw_contents
def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False): @@ -402,11 +358,11 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi if not line: break # EOF
- line_match = KEYWORD_LINE.match(line) + line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(line))
if not line_match: # no spaces or tabs in the line - line_keyword = line.strip() + line_keyword = stem.util.str_tools._to_unicode(line.strip()) else: line_keyword = line_match.groups()[0]
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index 5f1c423..5e6ddd0 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -164,9 +164,9 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True):
if extrainfo_content: if is_bridge: - yield BridgeExtraInfoDescriptor("".join(extrainfo_content), validate) + yield BridgeExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate) else: - yield RelayExtraInfoDescriptor("".join(extrainfo_content), validate) + yield RelayExtraInfoDescriptor(bytes.join(b"", extrainfo_content), validate) else: break # done parsing file
@@ -322,6 +322,7 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor): """
super(ExtraInfoDescriptor, self).__init__(raw_contents) + raw_contents = stem.util.str_tools._to_unicode(raw_contents)
self.nickname = None self.fingerprint = None diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py index dc818aa..324ee32 100644 --- a/stem/descriptor/microdescriptor.py +++ b/stem/descriptor/microdescriptor.py @@ -69,7 +69,7 @@ def _parse_file(descriptor_file, validate = True):
if not line: break # EOF - elif line.startswith("@") or line.startswith("onion-key"): + elif line.startswith(b"@") or line.startswith(b"onion-key"): descriptor_file.seek(last_position) break else: @@ -77,9 +77,9 @@ def _parse_file(descriptor_file, validate = True):
if descriptor_lines: # strip newlines from annotations - annotations = map(unicode.strip, annotations) + annotations = map(bytes.strip, annotations)
- descriptor_text = "".join(descriptor_lines) + descriptor_text = bytes.join(b"", descriptor_lines)
yield Microdescriptor(descriptor_text, validate, annotations) else: @@ -105,6 +105,7 @@ class Microdescriptor(stem.descriptor.Descriptor):
def __init__(self, raw_contents, validate = True, annotations = None): super(Microdescriptor, self).__init__(raw_contents) + raw_contents = stem.util.str_tools._to_unicode(raw_contents)
self.onion_key = None self.ntor_onion_key = None @@ -143,8 +144,8 @@ class Microdescriptor(stem.descriptor.Descriptor): annotation_dict = {}
for line in self._annotation_lines: - if " " in line: - key, value = line.split(" ", 1) + if b" " in line: + key, value = line.split(b" ", 1) annotation_dict[key] = value else: annotation_dict[line] = None diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index 74431d1..c32a7ce 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -47,7 +47,7 @@ For more information see :func:`~stem.descriptor.__init__.DocumentHandler`... """
import datetime -import StringIO +import io
import stem.descriptor import stem.descriptor.router_status_entry @@ -201,7 +201,7 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde routers_end = document_file.tell()
footer = document_file.readlines() - document_content = "".join(header + footer) + document_content = bytes.join(b"", header + footer)
if document_handler == stem.descriptor.DocumentHandler.BARE_DOCUMENT: yield document_type(document_content, validate) @@ -265,7 +265,6 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): """
def __init__(self, raw_content, validate = True): - raw_content = stem.util.str_tools._to_unicode(raw_content) super(NetworkStatusDocumentV2, self).__init__(raw_content)
self.version = None @@ -289,8 +288,8 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): # that header/footer attributes aren't in the wrong section. This is a # deprecated descriptor type - patches welcome if you want those checks.
- document_file = StringIO.StringIO(raw_content) - document_content = "".join(stem.descriptor._read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file)) + document_file = io.BytesIO(raw_content) + document_content = bytes.join(b"", stem.descriptor._read_until_keywords((ROUTERS_START, V2_FOOTER_START), document_file))
self.routers = tuple(stem.descriptor.router_status_entry._parse_file( document_file, @@ -301,7 +300,8 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): extra_args = (self,), ))
- document_content += "\n" + document_file.read() + document_content += b"\n" + document_file.read() + document_content = stem.util.str_tools._to_unicode(document_content)
entries = stem.descriptor._get_descriptor_components(document_content, validate)
@@ -464,9 +464,8 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): :raises: **ValueError** if the document is invalid """
- raw_content = stem.util.str_tools._to_unicode(raw_content) super(NetworkStatusDocumentV3, self).__init__(raw_content) - document_file = StringIO.StringIO(raw_content) + document_file = io.BytesIO(raw_content)
self._header = _DocumentHeader(document_file, validate, default_params)
@@ -561,7 +560,8 @@ class _DocumentHeader(object):
self._unrecognized_lines = []
- content = "".join(stem.descriptor._read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) + content = bytes.join(b"", stem.descriptor._read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) + content = stem.util.str_tools._to_unicode(content) entries = stem.descriptor._get_descriptor_components(content, validate) self._parse(entries, validate)
@@ -791,7 +791,7 @@ class _DocumentFooter(object): self.bandwidth_weights = {} self._unrecognized_lines = []
- content = document_file.read() + content = stem.util.str_tools._to_unicode(document_file.read())
if not content: return # footer is optional and there's nothing to parse @@ -1006,6 +1006,7 @@ class DirectoryAuthority(stem.descriptor.Descriptor): """
super(DirectoryAuthority, self).__init__(raw_content) + raw_content = stem.util.str_tools._to_unicode(raw_content)
self.nickname = None self.fingerprint = None @@ -1201,6 +1202,7 @@ class KeyCertificate(stem.descriptor.Descriptor):
def __init__(self, raw_content, validate = True): super(KeyCertificate, self).__init__(raw_content) + raw_content = stem.util.str_tools._to_unicode(raw_content)
self.version = None self.address = None @@ -1415,9 +1417,8 @@ class BridgeNetworkStatusDocument(NetworkStatusDocument): self.routers = None self.published = None
- document_file = StringIO.StringIO(raw_content) - - published_line = document_file.readline() + document_file = io.BytesIO(raw_content) + published_line = stem.util.str_tools._to_unicode(document_file.readline())
if published_line.startswith("published "): published_line = published_line.split(" ", 1)[1].strip() @@ -1428,7 +1429,7 @@ class BridgeNetworkStatusDocument(NetworkStatusDocument): if validate: raise ValueError("Bridge network status document's 'published' time wasn't parsable: %s" % published_line) elif validate: - raise ValueError("Bridge network status documents must start with a 'published' line:\n%s" % raw_content) + raise ValueError("Bridge network status documents must start with a 'published' line:\n%s" % stem.util.str_tools._to_unicode(raw_content))
self.routers = tuple(stem.descriptor.router_status_entry._parse_file( document_file, diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py index e2579eb..63cf24b 100644 --- a/stem/descriptor/router_status_entry.py +++ b/stem/descriptor/router_status_entry.py @@ -64,7 +64,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start # check if we're starting at the end of the section (ie, there's no entries to read) if section_end_keywords: first_keyword = None - line_match = stem.descriptor.KEYWORD_LINE.match(document_file.readline()) + line_match = stem.descriptor.KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
if line_match: first_keyword = line_match.groups()[0] @@ -83,7 +83,7 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = "r", start include_ending_keyword = True )
- desc_content = "".join(desc_lines) + desc_content = bytes.join(b"", desc_lines)
if desc_content: yield entry_class(desc_content, validate, *extra_args) @@ -129,6 +129,7 @@ class RouterStatusEntry(stem.descriptor.Descriptor): """
super(RouterStatusEntry, self).__init__(content) + content = stem.util.str_tools._to_unicode(content)
self.document = document
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index 103ba8b..a02eb28 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -127,9 +127,9 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True):
if descriptor_content: # strip newlines from annotations - annotations = map(unicode.strip, annotations) + annotations = map(bytes.strip, annotations)
- descriptor_text = "".join(descriptor_content) + descriptor_text = bytes.join(b"", descriptor_content)
if is_bridge: yield BridgeDescriptor(descriptor_text, validate, annotations) @@ -209,6 +209,7 @@ class ServerDescriptor(stem.descriptor.Descriptor): """
super(ServerDescriptor, self).__init__(raw_contents) + raw_contents = stem.util.str_tools._to_unicode(raw_contents)
self.nickname = None self.fingerprint = None @@ -302,8 +303,8 @@ class ServerDescriptor(stem.descriptor.Descriptor): annotation_dict = {}
for line in self._annotation_lines: - if " " in line: - key, value = line.split(" ", 1) + if b" " in line: + key, value = line.split(b" ", 1) annotation_dict[key] = value else: annotation_dict[line] = None @@ -652,9 +653,9 @@ class RelayDescriptor(ServerDescriptor): # Digest is calculated from everything in the # descriptor except the router-signature.
- raw_descriptor = str(self) - start_token = "router " - sig_token = "\nrouter-signature\n" + raw_descriptor = self.get_bytes() + start_token = b"router " + sig_token = b"\nrouter-signature\n" start = raw_descriptor.find(start_token) sig_start = raw_descriptor.find(sig_token) end = sig_start + len(sig_token) diff --git a/stem/response/events.py b/stem/response/events.py index d99b291..4c44e48 100644 --- a/stem/response/events.py +++ b/stem/response/events.py @@ -3,8 +3,8 @@ # See LICENSE for licensing information
import datetime +import io import re -import StringIO import time
import stem @@ -597,7 +597,7 @@ class NetworkStatusEvent(Event): content = str(self).lstrip("NS\n").rstrip("\nOK")
self.desc = list(stem.descriptor.router_status_entry._parse_file( - StringIO.StringIO(content), + io.BytesIO(str_tools._to_bytes(content)), True, entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV3, )) @@ -622,7 +622,7 @@ class NewConsensusEvent(Event): content = str(self).lstrip("NEWCONSENSUS\n").rstrip("\nOK")
self.desc = list(stem.descriptor.router_status_entry._parse_file( - StringIO.StringIO(content), + io.BytesIO(str_tools._to_bytes(content)), True, entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV3, )) diff --git a/test/integ/descriptor/microdescriptor.py b/test/integ/descriptor/microdescriptor.py index 329b5eb..da9cedd 100644 --- a/test/integ/descriptor/microdescriptor.py +++ b/test/integ/descriptor/microdescriptor.py @@ -78,8 +78,8 @@ class TestMicrodescriptor(unittest.TestCase): self.assertEquals([], router.or_addresses) self.assertEquals([], router.family) self.assertEquals(stem.exit_policy.MicroExitPolicy("reject 1-65535"), router.exit_policy) - self.assertEquals({"@last-listed": "2013-02-24 00:18:36"}, router.get_annotations()) - self.assertEquals(["@last-listed 2013-02-24 00:18:36"], router.get_annotation_lines()) + self.assertEquals({b"@last-listed": b"2013-02-24 00:18:36"}, router.get_annotations()) + self.assertEquals([b"@last-listed 2013-02-24 00:18:36"], router.get_annotation_lines())
router = next(descriptors) self.assertEquals(SECOND_ONION_KEY, router.onion_key) @@ -87,8 +87,8 @@ class TestMicrodescriptor(unittest.TestCase): self.assertEquals([], router.or_addresses) self.assertEquals(["$6141629FA0D15A6AEAEF3A1BEB76E64C767B3174"], router.family) self.assertEquals(stem.exit_policy.MicroExitPolicy("reject 1-65535"), router.exit_policy) - self.assertEquals({"@last-listed": "2013-02-24 00:18:37"}, router.get_annotations()) - self.assertEquals(["@last-listed 2013-02-24 00:18:37"], router.get_annotation_lines()) + self.assertEquals({b"@last-listed": b"2013-02-24 00:18:37"}, router.get_annotations()) + self.assertEquals([b"@last-listed 2013-02-24 00:18:37"], router.get_annotation_lines())
router = next(descriptors) self.assertEquals(THIRD_ONION_KEY, router.onion_key) @@ -96,5 +96,5 @@ class TestMicrodescriptor(unittest.TestCase): self.assertEquals([(u"2001:6b0:7:125::242", 9001, True)], router.or_addresses) self.assertEquals([], router.family) self.assertEquals(stem.exit_policy.MicroExitPolicy("accept 80,443"), router.exit_policy) - self.assertEquals({"@last-listed": "2013-02-24 00:18:36"}, router.get_annotations()) - self.assertEquals(["@last-listed 2013-02-24 00:18:36"], router.get_annotation_lines()) + self.assertEquals({b"@last-listed": b"2013-02-24 00:18:36"}, router.get_annotations()) + self.assertEquals([b"@last-listed 2013-02-24 00:18:36"], router.get_annotation_lines()) diff --git a/test/integ/descriptor/server_descriptor.py b/test/integ/descriptor/server_descriptor.py index e3745c0..9217a57 100644 --- a/test/integ/descriptor/server_descriptor.py +++ b/test/integ/descriptor/server_descriptor.py @@ -228,7 +228,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4= self.assertEquals([], desc.get_unrecognized_lines())
# Make sure that we can get a string representation for this descriptor - # (having unicode content risks a UnicodeEncodeError)... + # (having non-unicode content risks a UnicodeEncodeError)... # # https://trac.torproject.org/8265
diff --git a/test/mocking.py b/test/mocking.py index beaa354..b491010 100644 --- a/test/mocking.py +++ b/test/mocking.py @@ -58,6 +58,7 @@ import base64 import hashlib import inspect import itertools +import StringIO
import stem.descriptor.extrainfo_descriptor import stem.descriptor.microdescriptor @@ -66,6 +67,7 @@ import stem.descriptor.router_status_entry import stem.descriptor.server_descriptor import stem.prereq import stem.response +import stem.util.str_tools
# Once we've mocked a function we can't rely on its __module__ or __name__ # attributes, so instead we associate a unique 'mock_id' attribute that maps @@ -640,7 +642,7 @@ def _get_descriptor_content(attr = None, exclude = (), header_template = (), foo else: remainder.append(k)
- return "\n".join(header_content + remainder + footer_content) + return stem.util.str_tools._to_bytes("\n".join(header_content + remainder + footer_content))
def get_relay_server_descriptor(attr = None, exclude = (), content = False, sign_content = False): @@ -831,7 +833,7 @@ def get_directory_authority(attr = None, exclude = (), is_vote = False, content desc_content = _get_descriptor_content(attr, exclude, AUTHORITY_HEADER)
if is_vote: - desc_content += "\n" + str(get_key_certificate()) + desc_content += b"\n" + get_key_certificate(content = True)
if content: return desc_content @@ -917,31 +919,31 @@ def get_network_status_document_v3(attr = None, exclude = (), authorities = None
# inject the authorities and/or routers between the header and footer if authorities: - if "directory-footer" in desc_content: - footer_div = desc_content.find("\ndirectory-footer") + 1 - elif "directory-signature" in desc_content: - footer_div = desc_content.find("\ndirectory-signature") + 1 + if b"directory-footer" in desc_content: + footer_div = desc_content.find(b"\ndirectory-footer") + 1 + elif b"directory-signature" in desc_content: + footer_div = desc_content.find(b"\ndirectory-signature") + 1 else: if routers: - desc_content += "\n" + desc_content += b"\n"
footer_div = len(desc_content) + 1
- authority_content = "\n".join([str(a) for a in authorities]) + "\n" + authority_content = stem.util.str_tools._to_bytes("\n".join([str(a) for a in authorities]) + "\n") desc_content = desc_content[:footer_div] + authority_content + desc_content[footer_div:]
if routers: - if "directory-footer" in desc_content: - footer_div = desc_content.find("\ndirectory-footer") + 1 - elif "directory-signature" in desc_content: - footer_div = desc_content.find("\ndirectory-signature") + 1 + if b"directory-footer" in desc_content: + footer_div = desc_content.find(b"\ndirectory-footer") + 1 + elif b"directory-signature" in desc_content: + footer_div = desc_content.find(b"\ndirectory-signature") + 1 else: if routers: - desc_content += "\n" + desc_content += b"\n"
footer_div = len(desc_content) + 1
- router_content = "\n".join([str(r) for r in routers]) + "\n" + router_content = stem.util.str_tools._to_bytes("\n".join([str(r) for r in routers]) + "\n") desc_content = desc_content[:footer_div] + router_content + desc_content[footer_div:]
if content: @@ -1057,3 +1059,60 @@ def sign_descriptor_content(desc_content): desc_content = desc_content[:rst_start] + router_signature_token + router_signature_start + signature_base64 + router_signature_end
return desc_content + + +class BytesBuffer(object): + """ + Similiar to a StringIO but provides bytes content (in python 3.x StringIO can + only be used for unicode). + """ + + def __init__(self, content): + self.wrapped_file = StringIO.StringIO(stem.util.str_tools._to_unicode(content)) + + def close(self): + return self.wrapped_file.close() + + def getvalue(self): + return self.wrapped_file.getvalue() + + def isatty(self): + return self.wrapped_file.isatty() + + def next(self): + return self.wrapped_file.next() + + def read(self, n = -1): + return stem.util.str_tools._to_bytes(self.wrapped_file.read(n)) + + def readline(self): + return stem.util.str_tools._to_bytes(self.wrapped_file.readline()) + + def readlines(self, sizehint = None): + # being careful to do in-place conversion so we don't accidently double our + # memory usage + + if sizehint is not None: + results = self.wrapped_file.readlines(sizehint) + else: + results = self.wrapped_file.readlines() + + for i in xrange(len(results)): + results[i] = stem.util.str_tools._to_bytes(results[i]) + + return results + + def seek(self, pos, mode = None): + if mode is not None: + return self.wrapped_file.seek(pos, mode) + else: + return self.wrapped_file.seek(pos) + + def tell(self): + return self.wrapped_file.tell() + + def __enter__(self): + return self + + def __exit__(self, exit_type, value, traceback): + pass diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py index 6a2dd34..957c861 100644 --- a/test/unit/descriptor/extrainfo_descriptor.py +++ b/test/unit/descriptor/extrainfo_descriptor.py @@ -35,7 +35,7 @@ class TestExtraInfoDescriptor(unittest.TestCase): Includes a line prior to the 'extra-info' entry. """
- desc_text = "exit-streams-opened port=80\n" + get_relay_extrainfo_descriptor(content = True) + desc_text = b"exit-streams-opened port=80\n" + get_relay_extrainfo_descriptor(content = True) self._expect_invalid_attr(desc_text)
def test_trailing_line(self): @@ -43,7 +43,7 @@ class TestExtraInfoDescriptor(unittest.TestCase): Includes a line after the 'router-signature' entry. """
- desc_text = get_relay_extrainfo_descriptor(content = True) + "\nexit-streams-opened port=80" + desc_text = get_relay_extrainfo_descriptor(content = True) + b"\nexit-streams-opened port=80" self._expect_invalid_attr(desc_text)
def test_extrainfo_line_missing_fields(self): diff --git a/test/unit/descriptor/microdescriptor.py b/test/unit/descriptor/microdescriptor.py index 2a2dd35..2644ffe 100644 --- a/test/unit/descriptor/microdescriptor.py +++ b/test/unit/descriptor/microdescriptor.py @@ -41,7 +41,7 @@ class TestMicrodescriptor(unittest.TestCase): Includes a line prior to the 'onion-key' entry. """
- desc_text = "family Amunet1\n" + get_microdescriptor(content = True) + desc_text = b"family Amunet1\n" + get_microdescriptor(content = True) self.assertRaises(ValueError, Microdescriptor, desc_text)
desc = Microdescriptor(desc_text, validate = False) @@ -53,8 +53,8 @@ class TestMicrodescriptor(unittest.TestCase): """
desc_text = get_microdescriptor(content = True) - desc_text += "\na 10.45.227.253:9001" - desc_text += "\na [fd9f:2e19:3bcf::02:9970]:9001" + desc_text += b"\na 10.45.227.253:9001" + desc_text += b"\na [fd9f:2e19:3bcf::02:9970]:9001"
expected = [ ("10.45.227.253", 9001, False), @@ -75,8 +75,8 @@ class TestMicrodescriptor(unittest.TestCase): # try multiple family lines
desc_text = get_microdescriptor(content = True) - desc_text += "\nfamily Amunet1" - desc_text += "\nfamily Amunet2" + desc_text += b"\nfamily Amunet1" + desc_text += b"\nfamily Amunet2"
self.assertRaises(ValueError, Microdescriptor, desc_text)
diff --git a/test/unit/descriptor/networkstatus/bridge_document.py b/test/unit/descriptor/networkstatus/bridge_document.py index 322b5a9..97aeb0c 100644 --- a/test/unit/descriptor/networkstatus/bridge_document.py +++ b/test/unit/descriptor/networkstatus/bridge_document.py @@ -7,7 +7,7 @@ import unittest
from stem.descriptor.networkstatus import BridgeNetworkStatusDocument
-DOCUMENT = """\ +DOCUMENT = b"""\ published 2012-06-01 04:07:04 r Unnamed ABSiBVJ42z6w5Z6nAXQUFq8YVVg FI74aFuNJZZQrgln0f+OaocMd0M 2012-05-31 15:57:00 10.97.236.247 443 0 s Valid @@ -26,7 +26,7 @@ class TestBridgeNetworkStatusDocument(unittest.TestCase): Parse a document without any router status entries. """
- document = BridgeNetworkStatusDocument("published 2012-06-01 04:07:04") + document = BridgeNetworkStatusDocument(b"published 2012-06-01 04:07:04") self.assertEqual(datetime.datetime(2012, 6, 1, 4, 7, 4), document.published) self.assertEqual((), document.routers) self.assertEqual([], document.get_unrecognized_lines()) diff --git a/test/unit/descriptor/networkstatus/directory_authority.py b/test/unit/descriptor/networkstatus/directory_authority.py index 539b6b1..d8a818a 100644 --- a/test/unit/descriptor/networkstatus/directory_authority.py +++ b/test/unit/descriptor/networkstatus/directory_authority.py @@ -83,7 +83,7 @@ class TestDirectoryAuthority(unittest.TestCase): Includes a non-mandatory field before the 'dir-source' line. """
- content = "ho-hum 567\n" + get_directory_authority(content = True) + content = b"ho-hum 567\n" + get_directory_authority(content = True) self.assertRaises(ValueError, DirectoryAuthority, content)
authority = DirectoryAuthority(content, False) @@ -119,10 +119,10 @@ class TestDirectoryAuthority(unittest.TestCase): Duplicates linesin the entry. """
- lines = get_directory_authority(content = True).split("\n") + lines = get_directory_authority(content = True).split(b"\n")
for index, duplicate_line in enumerate(lines): - content = "\n".join(lines[:index] + [duplicate_line] + lines[index:]) + content = b"\n".join(lines[:index] + [duplicate_line] + lines[index:]) self.assertRaises(ValueError, DirectoryAuthority, content)
authority = DirectoryAuthority(content, False) @@ -254,17 +254,17 @@ class TestDirectoryAuthority(unittest.TestCase): Includes or exclude a key certificate from the directory entry. """
- key_cert = get_key_certificate() + key_cert = get_key_certificate(content = True)
# include a key cert with a consensus - content = get_directory_authority(content = True) + "\n" + str(key_cert) + content = get_directory_authority(content = True) + b"\n" + key_cert self.assertRaises(ValueError, DirectoryAuthority, content)
authority = DirectoryAuthority(content, False) self.assertEqual('turtles', authority.nickname)
# exclude key cert from a vote - content = get_directory_authority(content = True, is_vote = True).replace("\n" + str(key_cert), '') + content = get_directory_authority(content = True, is_vote = True).replace(b"\n" + key_cert, b'') self.assertRaises(ValueError, DirectoryAuthority, content, True, True)
authority = DirectoryAuthority(content, False, True) diff --git a/test/unit/descriptor/networkstatus/document_v3.py b/test/unit/descriptor/networkstatus/document_v3.py index 1a478cf..c140d23 100644 --- a/test/unit/descriptor/networkstatus/document_v3.py +++ b/test/unit/descriptor/networkstatus/document_v3.py @@ -5,7 +5,6 @@ Unit tests for the NetworkStatusDocumentV3 of stem.descriptor.networkstatus. from __future__ import with_statement
import datetime -import StringIO import unittest
import stem.descriptor @@ -25,11 +24,11 @@ from stem.descriptor.router_status_entry import \ RouterStatusEntryV3, \ RouterStatusEntryMicroV3
-from test.mocking import support_with, \ - get_router_status_entry_v3, \ +from test.mocking import get_router_status_entry_v3, \ get_router_status_entry_micro_v3, \ get_directory_authority, \ get_network_status_document_v3, \ + BytesBuffer, \ CRYPTO_BLOB, \ DOC_SIG, \ NETWORK_STATUS_DOCUMENT_FOOTER @@ -119,7 +118,7 @@ class TestNetworkStatusDocument(unittest.TestCase):
# first example: parsing via the NetworkStatusDocumentV3 constructor
- consensus_file = StringIO.StringIO(content) + consensus_file = BytesBuffer(content) consensus = NetworkStatusDocumentV3(consensus_file.read()) consensus_file.close()
@@ -128,7 +127,7 @@ class TestNetworkStatusDocument(unittest.TestCase):
# second example: using stem.descriptor.parse_file
- with support_with(StringIO.StringIO(content)) as consensus_file: + with BytesBuffer(content) as consensus_file: for router in stem.descriptor.parse_file(consensus_file, 'network-status-consensus-3 1.0'): self.assertEqual('caerSidi', router.nickname)
@@ -145,12 +144,12 @@ class TestNetworkStatusDocument(unittest.TestCase): entry2 = get_router_status_entry_v3({'s': "Valid"}) content = get_network_status_document_v3(routers = (entry1, entry2), content = True)
- descriptors = list(stem.descriptor.parse_file(StringIO.StringIO(content), 'network-status-consensus-3 1.0', document_handler = stem.descriptor.DocumentHandler.DOCUMENT)) + descriptors = list(stem.descriptor.parse_file(BytesBuffer(content), 'network-status-consensus-3 1.0', document_handler = stem.descriptor.DocumentHandler.DOCUMENT)) self.assertEqual(1, len(descriptors)) self.assertTrue(isinstance(descriptors[0], NetworkStatusDocumentV3)) self.assertEqual(2, len(descriptors[0].routers))
- descriptors = list(stem.descriptor.parse_file(StringIO.StringIO(content), 'network-status-consensus-3 1.0', document_handler = stem.descriptor.DocumentHandler.BARE_DOCUMENT)) + descriptors = list(stem.descriptor.parse_file(BytesBuffer(content), 'network-status-consensus-3 1.0', document_handler = stem.descriptor.DocumentHandler.BARE_DOCUMENT)) self.assertEqual(1, len(descriptors)) self.assertTrue(isinstance(descriptors[0], NetworkStatusDocumentV3)) self.assertEqual(0, len(descriptors[0].routers)) @@ -169,7 +168,7 @@ class TestNetworkStatusDocument(unittest.TestCase):
expected_document = get_network_status_document_v3()
- descriptor_file = StringIO.StringIO(content) + descriptor_file = BytesBuffer(content) entries = list(_parse_file(descriptor_file))
self.assertEquals(entry1, entries[0]) @@ -207,18 +206,18 @@ class TestNetworkStatusDocument(unittest.TestCase):
for is_consensus in (True, False): attr = {"vote-status": "consensus"} if is_consensus else {"vote-status": "vote"} - lines = get_network_status_document_v3(attr, content = True).split("\n") + lines = get_network_status_document_v3(attr, content = True).split(b"\n")
for index in xrange(len(lines) - 1): # once we reach the crypto blob we're done since swapping those won't # be detected - if lines[index].startswith(CRYPTO_BLOB[1:10]): break + if lines[index].startswith(stem.util.str_tools._to_bytes(CRYPTO_BLOB[1:10])): break
# swaps this line with the one after it test_lines = list(lines) test_lines[index], test_lines[index + 1] = test_lines[index + 1], test_lines[index]
- content = "\n".join(test_lines) + content = b"\n".join(test_lines) self.assertRaises(ValueError, NetworkStatusDocumentV3, content) NetworkStatusDocumentV3(content, False) # constructs without validation
@@ -230,7 +229,7 @@ class TestNetworkStatusDocument(unittest.TestCase):
for is_consensus in (True, False): attr = {"vote-status": "consensus"} if is_consensus else {"vote-status": "vote"} - lines = get_network_status_document_v3(attr, content = True).split("\n") + lines = get_network_status_document_v3(attr, content = True).split(b"\n")
for index, line in enumerate(lines): # Stop when we hit the 'directory-signature' for a couple reasons... @@ -239,13 +238,13 @@ class TestNetworkStatusDocument(unittest.TestCase): # validation failure
test_lines = list(lines) - if line.startswith("directory-signature "): + if line.startswith(b"directory-signature "): break
# duplicates the line test_lines.insert(index, line)
- content = "\n".join(test_lines) + content = b"\n".join(test_lines) self.assertRaises(ValueError, NetworkStatusDocumentV3, content) NetworkStatusDocumentV3(content, False) # constructs without validation
@@ -902,10 +901,11 @@ class TestNetworkStatusDocument(unittest.TestCase):
# make the dir-key-published field of the certiciate be malformed authority_content = get_directory_authority(is_vote = True, content = True) - authority_content = authority_content.replace("dir-key-published 2011", "dir-key-published 2011a") + authority_content = authority_content.replace(b"dir-key-published 2011", b"dir-key-published 2011a") + authority = DirectoryAuthority(authority_content, False, True)
- content = get_network_status_document_v3({"vote-status": "vote"}, authorities = (authority_content,), content = True) + content = get_network_status_document_v3({"vote-status": "vote"}, authorities = (authority,), content = True) self.assertRaises(ValueError, NetworkStatusDocumentV3, content)
document = NetworkStatusDocumentV3(content, validate = False) - self.assertEquals((DirectoryAuthority(authority_content, False, True),), document.directory_authorities) + self.assertEquals((authority,), document.directory_authorities) diff --git a/test/unit/descriptor/networkstatus/key_certificate.py b/test/unit/descriptor/networkstatus/key_certificate.py index 19ef090..f94bf0c 100644 --- a/test/unit/descriptor/networkstatus/key_certificate.py +++ b/test/unit/descriptor/networkstatus/key_certificate.py @@ -49,8 +49,8 @@ class TestKeyCertificate(unittest.TestCase):
content = get_key_certificate(content = True)
- for cert_text in ("dir-address 127.0.0.1:80\n" + content, - content + "\ndir-address 127.0.0.1:80"): + for cert_text in (b"dir-address 127.0.0.1:80\n" + content, + content + b"\ndir-address 127.0.0.1:80"): self.assertRaises(ValueError, KeyCertificate, cert_text)
certificate = KeyCertificate(cert_text, False) diff --git a/test/unit/descriptor/router_status_entry.py b/test/unit/descriptor/router_status_entry.py index a97153a..e236a75 100644 --- a/test/unit/descriptor/router_status_entry.py +++ b/test/unit/descriptor/router_status_entry.py @@ -135,7 +135,7 @@ class TestRouterStatusEntry(unittest.TestCase): Includes content prior to the 'r' line. """
- content = 'z some stuff\n' + get_router_status_entry_v3(content = True) + content = b'z some stuff\n' + get_router_status_entry_v3(content = True) self._expect_invalid_attr(content, "_unrecognized_lines", ['z some stuff'])
def test_blank_lines(self): @@ -143,7 +143,7 @@ class TestRouterStatusEntry(unittest.TestCase): Includes blank lines, which should be ignored. """
- content = get_router_status_entry_v3(content = True) + "\n\nv Tor 0.2.2.35\n\n" + content = get_router_status_entry_v3(content = True) + b"\n\nv Tor 0.2.2.35\n\n" entry = RouterStatusEntryV3(content) self.assertEqual("Tor 0.2.2.35", entry.version_line)
@@ -152,10 +152,10 @@ class TestRouterStatusEntry(unittest.TestCase): Duplicates linesin the entry. """
- lines = get_router_status_entry_v3(content = True).split("\n") + lines = get_router_status_entry_v3(content = True).split(b"\n")
for index, duplicate_line in enumerate(lines): - content = "\n".join(lines[:index] + [duplicate_line] + lines[index:]) + content = b"\n".join(lines[:index] + [duplicate_line] + lines[index:]) self.assertRaises(ValueError, RouterStatusEntryV3, content)
entry = RouterStatusEntryV3(content, False) @@ -324,8 +324,8 @@ class TestRouterStatusEntry(unittest.TestCase): # includes multiple 'a' lines
content = get_router_status_entry_v3(content = True) - content += "\na [2607:fcd0:daaa:101::602c:bd62]:443" - content += "\na [1148:fcd0:daaa:101::602c:bd62]:80" + content += b"\na [2607:fcd0:daaa:101::602c:bd62]:443" + content += b"\na [1148:fcd0:daaa:101::602c:bd62]:80"
expected = [ ('2607:fcd0:daaa:101::602c:bd62', 443, True), @@ -490,8 +490,8 @@ class TestRouterStatusEntry(unittest.TestCase): # try with multiple 'm' lines
content = get_router_status_entry_v3(content = True) - content += "\nm 11,12 sha256=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs" - content += "\nm 31,32 sha512=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs" + content += b"\nm 11,12 sha256=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs" + content += b"\nm 31,32 sha512=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs"
expected = [ ([11, 12], {"sha256": "g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs"}), diff --git a/test/unit/descriptor/server_descriptor.py b/test/unit/descriptor/server_descriptor.py index e62e614..7e7828d 100644 --- a/test/unit/descriptor/server_descriptor.py +++ b/test/unit/descriptor/server_descriptor.py @@ -3,7 +3,6 @@ Unit tests for stem.descriptor.server_descriptor. """
import datetime -import StringIO import unittest
import stem.descriptor.server_descriptor @@ -18,6 +17,7 @@ from test.mocking import no_op, \ revert_mocking, \ get_relay_server_descriptor, \ get_bridge_server_descriptor, \ + BytesBuffer, \ CRYPTO_BLOB
@@ -62,7 +62,7 @@ class TestServerDescriptor(unittest.TestCase): Includes a line prior to the 'router' entry. """
- desc_text = "hibernate 1\n" + get_relay_server_descriptor(content = True) + desc_text = b"hibernate 1\n" + get_relay_server_descriptor(content = True) self._expect_invalid_attr(desc_text)
def test_trailing_line(self): @@ -70,7 +70,7 @@ class TestServerDescriptor(unittest.TestCase): Includes a line after the 'router-signature' entry. """
- desc_text = get_relay_server_descriptor(content = True) + "\nhibernate 1" + desc_text = get_relay_server_descriptor(content = True) + b"\nhibernate 1" self._expect_invalid_attr(desc_text)
def test_nickname_missing(self): @@ -139,7 +139,7 @@ class TestServerDescriptor(unittest.TestCase): self.assertEquals("", desc.platform)
# does the same but with 'platform ' replaced with 'platform' - desc_text = desc_text.replace("platform ", "platform") + desc_text = desc_text.replace(b"platform ", b"platform") desc = RelayDescriptor(desc_text, validate = False) self.assertEquals("", desc.platform)
@@ -211,20 +211,20 @@ class TestServerDescriptor(unittest.TestCase): Checks that content before a descriptor are parsed as annotations. """
- desc_text = "@pepperjack very tasty\n@mushrooms not so much\n" + desc_text = b"@pepperjack very tasty\n@mushrooms not so much\n" desc_text += get_relay_server_descriptor(content = True) - desc_text += "\ntrailing text that should be ignored, ho hum" + desc_text += b"\ntrailing text that should be ignored, ho hum"
# running _parse_file should provide an iterator with a single descriptor - desc_iter = stem.descriptor.server_descriptor._parse_file(StringIO.StringIO(stem.util.str_tools._to_unicode(desc_text))) + desc_iter = stem.descriptor.server_descriptor._parse_file(BytesBuffer(stem.util.str_tools._to_unicode(desc_text))) desc_entries = list(desc_iter) self.assertEquals(1, len(desc_entries)) desc = desc_entries[0]
self.assertEquals("caerSidi", desc.nickname) - self.assertEquals("@pepperjack very tasty", desc.get_annotation_lines()[0]) - self.assertEquals("@mushrooms not so much", desc.get_annotation_lines()[1]) - self.assertEquals({"@pepperjack": "very tasty", "@mushrooms": "not so much"}, desc.get_annotations()) + self.assertEquals(b"@pepperjack very tasty", desc.get_annotation_lines()[0]) + self.assertEquals(b"@mushrooms not so much", desc.get_annotation_lines()[1]) + self.assertEquals({b"@pepperjack": b"very tasty", b"@mushrooms": b"not so much"}, desc.get_annotations()) self.assertEquals([], desc.get_unrecognized_lines())
def test_duplicate_field(self): @@ -233,7 +233,7 @@ class TestServerDescriptor(unittest.TestCase): """
desc_text = get_relay_server_descriptor({"<replace>": ""}, content = True) - desc_text = desc_text.replace("<replace>", "contact foo\ncontact bar") + desc_text = desc_text.replace(b"<replace>", b"contact foo\ncontact bar") self._expect_invalid_attr(desc_text, "contact", "foo")
def test_missing_required_attr(self): @@ -386,9 +386,9 @@ class TestServerDescriptor(unittest.TestCase): Constructs a bridge descriptor with multiple or-address entries and multiple ports. """
- desc_text = "\n".join((get_bridge_server_descriptor(content = True), - "or-address 10.45.227.253:9001", - "or-address [fd9f:2e19:3bcf::02:9970]:443")) + desc_text = b"\n".join((get_bridge_server_descriptor(content = True), + b"or-address 10.45.227.253:9001", + b"or-address [fd9f:2e19:3bcf::02:9970]:443"))
expected_or_addresses = [ ("10.45.227.253", 9001, False), diff --git a/test/unit/tutorial.py b/test/unit/tutorial.py index c421cd7..4ec6290 100644 --- a/test/unit/tutorial.py +++ b/test/unit/tutorial.py @@ -98,7 +98,7 @@ class TestTutorial(unittest.TestCase): for desc in parse_file(open("/home/atagar/.tor/cached-consensus")): print "found relay %s (%s)" % (desc.nickname, desc.fingerprint)
- test_file = StringIO.StringIO(mocking.get_network_status_document_v3( + test_file = mocking.BytesBuffer(mocking.get_network_status_document_v3( routers = [mocking.get_router_status_entry_v3()], content = True, )) @@ -165,7 +165,7 @@ class TestTutorial(unittest.TestCase):
exit_descriptor = mocking.get_relay_server_descriptor({ 'router': 'speedyexit 149.255.97.109 9001 0 0' - }, content = True).replace('reject *:*', 'accept *:*') + }, content = True).replace(b'reject *:*', b'accept *:*')
exit_descriptor = mocking.sign_descriptor_content(exit_descriptor) exit_descriptor = RelayDescriptor(exit_descriptor)