[tor-commits] [stem/master] Add a digest DigestEncoding argument

atagar at torproject.org atagar at torproject.org
Thu Nov 15 20:29:50 UTC 2018


commit 874f41977592506834d8bcff2cd711b26402f9f0
Author: Damian Johnson <atagar at torproject.org>
Date:   Thu Nov 15 09:29:56 2018 -0800

    Add a digest DigestEncoding argument
    
    Digests are defined by a hash type and encoding tuple. I was using the first to
    imply the second, but this doesn't always work. For example, the consensus
    cites base64 encoded sha1 server descriptor digests but stem provides hex
    encoded sha1s due to the following discussion with Karsten (subject: "Stem
    Sphinx Documentation", 6/7/12).
    
      >> - Why does digest() return the base64-encoded digest, not the
      >> hex-formatted one?  Network statuses are the only documents in Tor using
      >> base64 (or rather, a variant of it without trailing ='s), so it's easier
      >> to convert those to hex than to convert everything else to base64.  Now,
      >> if you switch to hex, you'll only have to decide between lower-case and
      >> upper-case.  I think Tor and metrics-lib use upper-case hex in most places.
      >
      > I went with base64 because I thought that this was only useful for
      > comparing with the network status. What uses the hex encoded digest?
    
      The hex-encoded server descriptor digest is used as file name in metrics
      tarballs.
    
      The (decoded) descriptor digest is used to verify the descriptor signature.
    
      Other reasons for hex-encoding the digest() result is that the digest()
      in extra-info descriptors should return the hex-encoded digest, too, or
      you wouldn't be able to compare it to the extra-info-digest line in
      server descriptors.  Having both methods return a different encoding
      would be confusing.
    
      Oh, and router-digest lines in sanitized bridge descriptors also contain
      the hex-encoded digest.  You wouldn't want to convert that to base64
      before writing it to the digest variable, nor would you want digest()
      and digest to return differently encoded digests.
    
    As such I'm going to leave both the hashing and encoding up to our callers
    *and* cite all digest uses I know of in our digest method's pydoc.
---
 stem/descriptor/__init__.py                  | 50 +++++++++++++++++++++++-----
 stem/descriptor/extrainfo_descriptor.py      | 41 +++++++++++++----------
 test/unit/descriptor/extrainfo_descriptor.py |  2 +-
 3 files changed, 66 insertions(+), 27 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index a35cd4e4..cf661c58 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -23,15 +23,28 @@ Package for parsing and processing descriptor data.
 
   .. versionadded:: 1.8.0
 
-  Hashing algorithm used by tor for descriptor digests. We drop trailing '='
-  hash padding to match Tor.
+  Hash function used by tor for descriptor digests.
 
-  =================== ===========
-  DigestHash          Description
-  =================== ===========
-  SHA1                SHA1 hash
-  SHA256              SHA256 hash
-  =================== ===========
+  =========== ===========
+  DigestHash  Description
+  =========== ===========
+  SHA1        SHA1 hash
+  SHA256      SHA256 hash
+  =========== ===========
+
+.. data:: DigestEncoding (enum)
+
+  .. versionadded:: 1.8.0
+
+  Encoding of descriptor digests.
+
+  ================= ===========
+  DigestEncoding    Description
+  ================= ===========
+  RAW               hash object
+  HEX               uppercase hexidecimal encoding
+  BASE64            base64 encoding `without trailing '=' padding <https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding>`_
+  ================= ===========
 
 .. data:: DocumentHandler (enum)
 
@@ -137,6 +150,12 @@ DigestHash = stem.util.enum.UppercaseEnum(
   'SHA256',
 )
 
+DigestEncoding = stem.util.enum.UppercaseEnum(
+  'RAW',
+  'HEX',
+  'BASE64',
+)
+
 DocumentHandler = stem.util.enum.UppercaseEnum(
   'ENTRIES',
   'DOCUMENT',
@@ -647,6 +666,21 @@ def _copy(default):
     return copy.copy(default)
 
 
+def _encode_digest(hash_value, encoding):
+  """
+  Encodes a hash value with the given HashEncoding.
+  """
+
+  if encoding == DigestEncoding.RAW:
+    return hash_value
+  elif encoding == DigestEncoding.HEX:
+    return hash_value.hexdigest().upper()
+  elif encoding == DigestEncoding.BASE64:
+    return base64.b64encode(hash_value.digest()).rstrip('=')
+  else:
+    raise NotImplementedError('BUG: stem.descriptor._encode_digest should recognize all DigestEncoding, lacked %s' % encoding)
+
+
 class Descriptor(object):
   """
   Common parent for all types of descriptors.
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index a374ea03..4319bb05 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -67,7 +67,6 @@ Extra-info descriptors are available from a few sources...
   ===================== ===========
 """
 
-import base64
 import functools
 import hashlib
 import re
@@ -81,6 +80,7 @@ from stem.descriptor import (
   PGP_BLOCK_END,
   Descriptor,
   DigestHash,
+  DigestEncoding,
   create_signing_key,
   _descriptor_content,
   _read_until_keywords,
@@ -868,18 +868,27 @@ class ExtraInfoDescriptor(Descriptor):
     else:
       self._entries = entries
 
-  def digest(self, hash_type = DigestHash.SHA1):
+  def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX):
     """
-    Provides the upper-case hex encoded sha1 of our content. This value is part
-    of the server descriptor entry for this relay.
+    Digest of this descriptor's content. These are referenced by...
+
+      * **Server Descriptors**
+
+        * Referer: :class:`~stem.descriptor.server_descriptor.ServerDescriptor` **extra_info_digest** attribute
+        * Format: **SHA1/HEX**
+
+      * **Server Descriptors**
+
+        * Referer: :class:`~stem.descriptor.server_descriptor.ServerDescriptor` **extra_info_sha256_digest** attribute
+        * Format: **SHA256/BASE64**
 
     .. versionchanged:: 1.8.0
-       Added the hash_type argument.
+       Added the hash_type and encoding arguments.
 
     :param stem.descriptor.DigestHash hash_type: digest hashing algorithm
+    :param stem.descriptor.DigestEncoding encoding: digest encoding
 
-    :returns: **str** with the upper-case hex digest value for this server
-      descriptor
+    :returns: **hashlib.HASH** or **str** based on our encoding argument
     """
 
     raise NotImplementedError('Unsupported Operation: this should be implemented by the ExtraInfoDescriptor subclass')
@@ -953,24 +962,20 @@ class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
     return cls(cls.content(attr, exclude, sign, signing_key), validate = validate)
 
   @lru_cache()
-  def digest(self, hash_type = DigestHash.SHA1):
+  def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX):
     if hash_type == DigestHash.SHA1:
       # our digest is calculated from everything except our signature
 
       raw_content, ending = str(self), '\nrouter-signature\n'
       raw_content = stem.util.str_tools._to_bytes(raw_content[:raw_content.find(ending) + len(ending)])
-      return hashlib.sha1(raw_content).hexdigest().upper()
+      return stem.descriptor._encode_digest(hashlib.sha1(raw_content), encoding)
     elif hash_type == DigestHash.SHA256:
       # Due to a tor bug sha256 digests are calculated from the
       # whole descriptor rather than ommiting the signature...
       #
       #   https://trac.torproject.org/projects/tor/ticket/28415
-      #
-      # Descriptors drop '=' hash padding from its fields (such
-      # as our server descriptor's extra-info-digest), so doing
-      # the same here so we match.
 
-      return base64.b64encode(hashlib.sha256(str(self)).digest()).rstrip('=')
+      return stem.descriptor._encode_digest(hashlib.sha256(str(self)), encoding)
     else:
       raise NotImplementedError('Extrainfo descriptor digests are only available in sha1 and sha256, not %s' % hash_type)
 
@@ -1013,13 +1018,13 @@ class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
       ('router-digest', _random_fingerprint()),
     ))
 
-  def digest(self, hash_type = DigestHash.SHA1):
-    if hash_type == DigestHash.SHA1:
+  def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX):
+    if hash_type == DigestHash.SHA1 and encoding == DigestEncoding.HEX:
       return self._digest
-    elif hash_type == DigestHash.SHA256:
+    elif hash_type == DigestHash.SHA256 and encoding == DigestEncoding.BASE64:
       return self.router_digest_sha256
     else:
-      raise NotImplementedError('Bridge extrainfo digests are only available in sha1 and sha256, not %s' % hash_type)
+      raise NotImplementedError('Bridge extrainfo digests are only available as sha1/hex and sha256/base64, not %s/%s' % (hash_type, encoding))
 
   def _required_fields(self):
     excluded_fields = [
diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py
index f4823c72..d459484b 100644
--- a/test/unit/descriptor/extrainfo_descriptor.py
+++ b/test/unit/descriptor/extrainfo_descriptor.py
@@ -54,7 +54,7 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
     self.assertEqual(900, desc.dir_write_history_interval)
     self.assertEqual(expected_signature, desc.signature)
     self.assertEqual('00A57A9AAB5EA113898E2DD02A755E31AFC27227', desc.digest(stem.descriptor.DigestHash.SHA1))
-    self.assertEqual('n2+wh6uM+lbKnhbkOog2jv9X5tPytlrFdO+I+auSmME', desc.digest(stem.descriptor.DigestHash.SHA256))
+    self.assertEqual('n2+wh6uM+lbKnhbkOog2jv9X5tPytlrFdO+I+auSmME', desc.digest(stem.descriptor.DigestHash.SHA256, stem.descriptor.DigestEncoding.BASE64))
     self.assertEqual([], desc.get_unrecognized_lines())
 
     # The read-history, write-history, dirreq-read-history, and





More information about the tor-commits mailing list