[stem/master] Do unicode conversion in _get_descriptor_components()

commit e2a55771cfc85882b0e00a071c3a8dd6ea126fd3 Author: Damian Johnson <atagar@torproject.org> Date: Sun Jan 25 11:15:44 2015 -0800 Do unicode conversion in _get_descriptor_components() Every descriptor type does the bytes => unicode conversion for it. Might as well just do this in the helper itself. --- stem/descriptor/__init__.py | 3 +++ stem/descriptor/extrainfo_descriptor.py | 2 -- stem/descriptor/microdescriptor.py | 1 - stem/descriptor/networkstatus.py | 21 +++++++-------------- stem/descriptor/router_status_entry.py | 2 -- stem/descriptor/server_descriptor.py | 3 +-- 6 files changed, 11 insertions(+), 21 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index a5eb87d..0d2295d 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -649,6 +649,9 @@ def _get_descriptor_components(raw_contents, validate, extra_keywords = ()): value tuple, the second being a list of those entries. """ + if isinstance(raw_contents, bytes): + raw_contents = stem.util.str_tools._to_unicode(raw_contents) + entries = OrderedDict() extra_entries = [] # entries with a keyword in extra_keywords remaining_lines = raw_contents.split('\n') diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index 1eebe46..5b0339c 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -781,8 +781,6 @@ class ExtraInfoDescriptor(Descriptor): """ super(ExtraInfoDescriptor, self).__init__(raw_contents, lazy_load = not validate) - raw_contents = stem.util.str_tools._to_unicode(raw_contents) - entries = _get_descriptor_components(raw_contents, validate) if validate: diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py index 3d9447f..5b8d0a3 100644 --- a/stem/descriptor/microdescriptor.py +++ b/stem/descriptor/microdescriptor.py @@ -220,7 +220,6 @@ class Microdescriptor(Descriptor): def __init__(self, raw_contents, validate = True, annotations = None): super(Microdescriptor, self).__init__(raw_contents, lazy_load = not validate) - raw_contents = stem.util.str_tools._to_unicode(raw_contents) self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper() self._annotation_lines = annotations if annotations else [] diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index 2289cdf..a8ec38a 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -399,10 +399,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): self.routers = dict((desc.fingerprint, desc) for desc in router_iter) - document_content += b'\n' + document_file.read() - document_content = stem.util.str_tools._to_unicode(document_content) - - entries = _get_descriptor_components(document_content, validate) + entries = _get_descriptor_components(document_content + b'\n' + document_file.read(), validate) if validate: self._check_constraints(entries) @@ -791,7 +788,6 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): def _header(self, document_file, validate): content = bytes.join(b'', _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) - content = stem.util.str_tools._to_unicode(content) entries = _get_descriptor_components(content, validate) if validate: @@ -820,8 +816,7 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): self._entries.update(entries) def _footer(self, document_file, validate): - content = stem.util.str_tools._to_unicode(document_file.read()) - entries = _get_descriptor_components(content, validate) if content else {} + entries = _get_descriptor_components(document_file.read(), validate) if validate: for keyword, values in list(entries.items()): @@ -1282,26 +1277,24 @@ class KeyCertificate(Descriptor): def __init__(self, raw_content, validate = True): super(KeyCertificate, self).__init__(raw_content, lazy_load = not validate) - - content = stem.util.str_tools._to_unicode(raw_content) - entries = _get_descriptor_components(content, validate) + entries = _get_descriptor_components(raw_content, validate) if validate: if 'dir-key-certificate-version' != list(entries.keys())[0]: - raise ValueError("Key certificates must start with a 'dir-key-certificate-version' line:\n%s" % (content)) + raise ValueError("Key certificates must start with a 'dir-key-certificate-version' line:\n%s" % (raw_content)) elif 'dir-key-certification' != list(entries.keys())[-1]: - raise ValueError("Key certificates must end with a 'dir-key-certification' line:\n%s" % (content)) + raise ValueError("Key certificates must end with a 'dir-key-certification' line:\n%s" % (raw_content)) # check that we have mandatory fields and that our known fields only # appear once for keyword, is_mandatory in KEY_CERTIFICATE_PARAMS: if is_mandatory and keyword not in entries: - raise ValueError("Key certificates must have a '%s' line:\n%s" % (keyword, content)) + raise ValueError("Key certificates must have a '%s' line:\n%s" % (keyword, raw_content)) entry_count = len(entries.get(keyword, [])) if entry_count > 1: - raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, content)) + raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, raw_content)) self._parse(entries, validate) else: diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py index f80c56b..292ec7e 100644 --- a/stem/descriptor/router_status_entry.py +++ b/stem/descriptor/router_status_entry.py @@ -399,8 +399,6 @@ class RouterStatusEntry(Descriptor): """ super(RouterStatusEntry, self).__init__(content, lazy_load = not validate) - content = stem.util.str_tools._to_unicode(content) - self.document = document entries = _get_descriptor_components(content, validate) diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index 5786ab9..b107b33 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -530,8 +530,7 @@ class ServerDescriptor(Descriptor): # influences the resulting exit policy, but for everything else the order # does not matter so breaking it into key / value pairs. - raw_contents = stem.util.str_tools._to_unicode(raw_contents) - entries, self._unparsed_exit_policy = _get_descriptor_components(raw_contents, validate, ('accept', 'reject')) + entries, self._unparsed_exit_policy = _get_descriptor_components(stem.util.str_tools._to_unicode(raw_contents), validate, ('accept', 'reject')) if validate: self._parse(entries, validate)
participants (1)
-
atagar@torproject.org