[tor-commits] [stem/master] Do unicode conversion in _get_descriptor_components()

atagar at torproject.org atagar at torproject.org
Sun Jan 25 22:37:34 UTC 2015


commit e2a55771cfc85882b0e00a071c3a8dd6ea126fd3
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Jan 25 11:15:44 2015 -0800

    Do unicode conversion in _get_descriptor_components()
    
    Every descriptor type does the bytes => unicode conversion for it. Might as
    well just do this in the helper itself.
---
 stem/descriptor/__init__.py             |    3 +++
 stem/descriptor/extrainfo_descriptor.py |    2 --
 stem/descriptor/microdescriptor.py      |    1 -
 stem/descriptor/networkstatus.py        |   21 +++++++--------------
 stem/descriptor/router_status_entry.py  |    2 --
 stem/descriptor/server_descriptor.py    |    3 +--
 6 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index a5eb87d..0d2295d 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -649,6 +649,9 @@ def _get_descriptor_components(raw_contents, validate, extra_keywords = ()):
     value tuple, the second being a list of those entries.
   """
 
+  if isinstance(raw_contents, bytes):
+    raw_contents = stem.util.str_tools._to_unicode(raw_contents)
+
   entries = OrderedDict()
   extra_entries = []  # entries with a keyword in extra_keywords
   remaining_lines = raw_contents.split('\n')
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index 1eebe46..5b0339c 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -781,8 +781,6 @@ class ExtraInfoDescriptor(Descriptor):
     """
 
     super(ExtraInfoDescriptor, self).__init__(raw_contents, lazy_load = not validate)
-    raw_contents = stem.util.str_tools._to_unicode(raw_contents)
-
     entries = _get_descriptor_components(raw_contents, validate)
 
     if validate:
diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py
index 3d9447f..5b8d0a3 100644
--- a/stem/descriptor/microdescriptor.py
+++ b/stem/descriptor/microdescriptor.py
@@ -220,7 +220,6 @@ class Microdescriptor(Descriptor):
 
   def __init__(self, raw_contents, validate = True, annotations = None):
     super(Microdescriptor, self).__init__(raw_contents, lazy_load = not validate)
-    raw_contents = stem.util.str_tools._to_unicode(raw_contents)
 
     self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper()
     self._annotation_lines = annotations if annotations else []
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index 2289cdf..a8ec38a 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -399,10 +399,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
 
     self.routers = dict((desc.fingerprint, desc) for desc in router_iter)
 
-    document_content += b'\n' + document_file.read()
-    document_content = stem.util.str_tools._to_unicode(document_content)
-
-    entries = _get_descriptor_components(document_content, validate)
+    entries = _get_descriptor_components(document_content + b'\n' + document_file.read(), validate)
 
     if validate:
       self._check_constraints(entries)
@@ -791,7 +788,6 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
 
   def _header(self, document_file, validate):
     content = bytes.join(b'', _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file))
-    content = stem.util.str_tools._to_unicode(content)
     entries = _get_descriptor_components(content, validate)
 
     if validate:
@@ -820,8 +816,7 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
       self._entries.update(entries)
 
   def _footer(self, document_file, validate):
-    content = stem.util.str_tools._to_unicode(document_file.read())
-    entries = _get_descriptor_components(content, validate) if content else {}
+    entries = _get_descriptor_components(document_file.read(), validate)
 
     if validate:
       for keyword, values in list(entries.items()):
@@ -1282,26 +1277,24 @@ class KeyCertificate(Descriptor):
 
   def __init__(self, raw_content, validate = True):
     super(KeyCertificate, self).__init__(raw_content, lazy_load = not validate)
-
-    content = stem.util.str_tools._to_unicode(raw_content)
-    entries = _get_descriptor_components(content, validate)
+    entries = _get_descriptor_components(raw_content, validate)
 
     if validate:
       if 'dir-key-certificate-version' != list(entries.keys())[0]:
-        raise ValueError("Key certificates must start with a 'dir-key-certificate-version' line:\n%s" % (content))
+        raise ValueError("Key certificates must start with a 'dir-key-certificate-version' line:\n%s" % (raw_content))
       elif 'dir-key-certification' != list(entries.keys())[-1]:
-        raise ValueError("Key certificates must end with a 'dir-key-certification' line:\n%s" % (content))
+        raise ValueError("Key certificates must end with a 'dir-key-certification' line:\n%s" % (raw_content))
 
       # check that we have mandatory fields and that our known fields only
       # appear once
 
       for keyword, is_mandatory in KEY_CERTIFICATE_PARAMS:
         if is_mandatory and keyword not in entries:
-          raise ValueError("Key certificates must have a '%s' line:\n%s" % (keyword, content))
+          raise ValueError("Key certificates must have a '%s' line:\n%s" % (keyword, raw_content))
 
         entry_count = len(entries.get(keyword, []))
         if entry_count > 1:
-          raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, content))
+          raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, raw_content))
 
       self._parse(entries, validate)
     else:
diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py
index f80c56b..292ec7e 100644
--- a/stem/descriptor/router_status_entry.py
+++ b/stem/descriptor/router_status_entry.py
@@ -399,8 +399,6 @@ class RouterStatusEntry(Descriptor):
     """
 
     super(RouterStatusEntry, self).__init__(content, lazy_load = not validate)
-    content = stem.util.str_tools._to_unicode(content)
-
     self.document = document
     entries = _get_descriptor_components(content, validate)
 
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 5786ab9..b107b33 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -530,8 +530,7 @@ class ServerDescriptor(Descriptor):
     # influences the resulting exit policy, but for everything else the order
     # does not matter so breaking it into key / value pairs.
 
-    raw_contents = stem.util.str_tools._to_unicode(raw_contents)
-    entries, self._unparsed_exit_policy = _get_descriptor_components(raw_contents, validate, ('accept', 'reject'))
+    entries, self._unparsed_exit_policy = _get_descriptor_components(stem.util.str_tools._to_unicode(raw_contents), validate, ('accept', 'reject'))
 
     if validate:
       self._parse(entries, validate)





More information about the tor-commits mailing list