commit 0288267be5a325aadd078a487751cf4ce83a2f73 Author: Damian Johnson atagar@torproject.org Date: Wed Jan 30 08:24:59 2013 -0800
Skip universal newline translation in descriptor reader
Python 3 introduces universal newline translation, converting '\n', '\r', and '\r\n' into the local system's newline style. This is a really neat feature and will solve many-a-headaches... but not for us. We conform to the tor spec which specifies when CRLF appears verses other newline types.
Universal newline translation broke our ability to read the 'cr_in_contact_line' example which has multiple '\r' within a contact line (https://trac.torproject.org/5637). Fixing the reader to disable newline translation and adding a warning to our parse_file() pydocs. --- stem/descriptor/__init__.py | 10 ++++++++++ stem/descriptor/reader.py | 8 +++++++- 2 files changed, 17 insertions(+), 1 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index fe6c03c..69d7db7 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -82,6 +82,16 @@ def parse_file(descriptor_file, descriptor_type = None, path = None, validate = tordnsel 1.0 **unsupported** ===================================== =====
+ If you're using python 3 then beware of the open() function's universal + newline translation. By default open() converts all common line endings (NL, + CR, and CRNL) into NL. In some edge cases this can cause us to misparse + content. To disable newline translation set the **newline** to an empty + string. For example... + + :: + + my_descriptor_file = open(descrptor_path, newline='') + :param file descriptor_file: opened file with the descriptor contents :param str descriptor_type: `descriptor type https://metrics.torproject.org/formats.html#descriptortypes`_, this is guessed if not provided :param str path: absolute path to the file's location on disk diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py index 877d230..ef35fc0 100644 --- a/stem/descriptor/reader.py +++ b/stem/descriptor/reader.py @@ -512,7 +512,13 @@ class DescriptorReader(object): def _handle_descriptor_file(self, target, mime_type): try: self._notify_read_listeners(target) - with open(target) as target_file: + + if stem.prereq.is_python_3(): + target_file = open(target, newline = '') + else: + target_file = open(target) + + with target_file as target_file: for desc in stem.descriptor.parse_file(target_file, validate = self._validate, path = target): if self._is_stopped.isSet(): return