commit 874f41977592506834d8bcff2cd711b26402f9f0 Author: Damian Johnson atagar@torproject.org Date: Thu Nov 15 09:29:56 2018 -0800
Add a digest DigestEncoding argument
Digests are defined by a hash type and encoding tuple. I was using the first to imply the second, but this doesn't always work. For example, the consensus cites base64 encoded sha1 server descriptor digests but stem provides hex encoded sha1s due to the following discussion with Karsten (subject: "Stem Sphinx Documentation", 6/7/12).
>> - Why does digest() return the base64-encoded digest, not the >> hex-formatted one? Network statuses are the only documents in Tor using >> base64 (or rather, a variant of it without trailing ='s), so it's easier >> to convert those to hex than to convert everything else to base64. Now, >> if you switch to hex, you'll only have to decide between lower-case and >> upper-case. I think Tor and metrics-lib use upper-case hex in most places. > > I went with base64 because I thought that this was only useful for > comparing with the network status. What uses the hex encoded digest?
The hex-encoded server descriptor digest is used as file name in metrics tarballs.
The (decoded) descriptor digest is used to verify the descriptor signature.
Other reasons for hex-encoding the digest() result is that the digest() in extra-info descriptors should return the hex-encoded digest, too, or you wouldn't be able to compare it to the extra-info-digest line in server descriptors. Having both methods return a different encoding would be confusing.
Oh, and router-digest lines in sanitized bridge descriptors also contain the hex-encoded digest. You wouldn't want to convert that to base64 before writing it to the digest variable, nor would you want digest() and digest to return differently encoded digests.
As such I'm going to leave both the hashing and encoding up to our callers *and* cite all digest uses I know of in our digest method's pydoc. --- stem/descriptor/__init__.py | 50 +++++++++++++++++++++++----- stem/descriptor/extrainfo_descriptor.py | 41 +++++++++++++---------- test/unit/descriptor/extrainfo_descriptor.py | 2 +- 3 files changed, 66 insertions(+), 27 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index a35cd4e4..cf661c58 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -23,15 +23,28 @@ Package for parsing and processing descriptor data.
.. versionadded:: 1.8.0
- Hashing algorithm used by tor for descriptor digests. We drop trailing '=' - hash padding to match Tor. + Hash function used by tor for descriptor digests.
- =================== =========== - DigestHash Description - =================== =========== - SHA1 SHA1 hash - SHA256 SHA256 hash - =================== =========== + =========== =========== + DigestHash Description + =========== =========== + SHA1 SHA1 hash + SHA256 SHA256 hash + =========== =========== + +.. data:: DigestEncoding (enum) + + .. versionadded:: 1.8.0 + + Encoding of descriptor digests. + + ================= =========== + DigestEncoding Description + ================= =========== + RAW hash object + HEX uppercase hexidecimal encoding + BASE64 base64 encoding `without trailing '=' padding https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding`_ + ================= ===========
.. data:: DocumentHandler (enum)
@@ -137,6 +150,12 @@ DigestHash = stem.util.enum.UppercaseEnum( 'SHA256', )
+DigestEncoding = stem.util.enum.UppercaseEnum( + 'RAW', + 'HEX', + 'BASE64', +) + DocumentHandler = stem.util.enum.UppercaseEnum( 'ENTRIES', 'DOCUMENT', @@ -647,6 +666,21 @@ def _copy(default): return copy.copy(default)
+def _encode_digest(hash_value, encoding): + """ + Encodes a hash value with the given HashEncoding. + """ + + if encoding == DigestEncoding.RAW: + return hash_value + elif encoding == DigestEncoding.HEX: + return hash_value.hexdigest().upper() + elif encoding == DigestEncoding.BASE64: + return base64.b64encode(hash_value.digest()).rstrip('=') + else: + raise NotImplementedError('BUG: stem.descriptor._encode_digest should recognize all DigestEncoding, lacked %s' % encoding) + + class Descriptor(object): """ Common parent for all types of descriptors. diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index a374ea03..4319bb05 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -67,7 +67,6 @@ Extra-info descriptors are available from a few sources... ===================== =========== """
-import base64 import functools import hashlib import re @@ -81,6 +80,7 @@ from stem.descriptor import ( PGP_BLOCK_END, Descriptor, DigestHash, + DigestEncoding, create_signing_key, _descriptor_content, _read_until_keywords, @@ -868,18 +868,27 @@ class ExtraInfoDescriptor(Descriptor): else: self._entries = entries
- def digest(self, hash_type = DigestHash.SHA1): + def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX): """ - Provides the upper-case hex encoded sha1 of our content. This value is part - of the server descriptor entry for this relay. + Digest of this descriptor's content. These are referenced by... + + * **Server Descriptors** + + * Referer: :class:`~stem.descriptor.server_descriptor.ServerDescriptor` **extra_info_digest** attribute + * Format: **SHA1/HEX** + + * **Server Descriptors** + + * Referer: :class:`~stem.descriptor.server_descriptor.ServerDescriptor` **extra_info_sha256_digest** attribute + * Format: **SHA256/BASE64**
.. versionchanged:: 1.8.0 - Added the hash_type argument. + Added the hash_type and encoding arguments.
:param stem.descriptor.DigestHash hash_type: digest hashing algorithm + :param stem.descriptor.DigestEncoding encoding: digest encoding
- :returns: **str** with the upper-case hex digest value for this server - descriptor + :returns: **hashlib.HASH** or **str** based on our encoding argument """
raise NotImplementedError('Unsupported Operation: this should be implemented by the ExtraInfoDescriptor subclass') @@ -953,24 +962,20 @@ class RelayExtraInfoDescriptor(ExtraInfoDescriptor): return cls(cls.content(attr, exclude, sign, signing_key), validate = validate)
@lru_cache() - def digest(self, hash_type = DigestHash.SHA1): + def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX): if hash_type == DigestHash.SHA1: # our digest is calculated from everything except our signature
raw_content, ending = str(self), '\nrouter-signature\n' raw_content = stem.util.str_tools._to_bytes(raw_content[:raw_content.find(ending) + len(ending)]) - return hashlib.sha1(raw_content).hexdigest().upper() + return stem.descriptor._encode_digest(hashlib.sha1(raw_content), encoding) elif hash_type == DigestHash.SHA256: # Due to a tor bug sha256 digests are calculated from the # whole descriptor rather than ommiting the signature... # # https://trac.torproject.org/projects/tor/ticket/28415 - # - # Descriptors drop '=' hash padding from its fields (such - # as our server descriptor's extra-info-digest), so doing - # the same here so we match.
- return base64.b64encode(hashlib.sha256(str(self)).digest()).rstrip('=') + return stem.descriptor._encode_digest(hashlib.sha256(str(self)), encoding) else: raise NotImplementedError('Extrainfo descriptor digests are only available in sha1 and sha256, not %s' % hash_type)
@@ -1013,13 +1018,13 @@ class BridgeExtraInfoDescriptor(ExtraInfoDescriptor): ('router-digest', _random_fingerprint()), ))
- def digest(self, hash_type = DigestHash.SHA1): - if hash_type == DigestHash.SHA1: + def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX): + if hash_type == DigestHash.SHA1 and encoding == DigestEncoding.HEX: return self._digest - elif hash_type == DigestHash.SHA256: + elif hash_type == DigestHash.SHA256 and encoding == DigestEncoding.BASE64: return self.router_digest_sha256 else: - raise NotImplementedError('Bridge extrainfo digests are only available in sha1 and sha256, not %s' % hash_type) + raise NotImplementedError('Bridge extrainfo digests are only available as sha1/hex and sha256/base64, not %s/%s' % (hash_type, encoding))
def _required_fields(self): excluded_fields = [ diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py index f4823c72..d459484b 100644 --- a/test/unit/descriptor/extrainfo_descriptor.py +++ b/test/unit/descriptor/extrainfo_descriptor.py @@ -54,7 +54,7 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw self.assertEqual(900, desc.dir_write_history_interval) self.assertEqual(expected_signature, desc.signature) self.assertEqual('00A57A9AAB5EA113898E2DD02A755E31AFC27227', desc.digest(stem.descriptor.DigestHash.SHA1)) - self.assertEqual('n2+wh6uM+lbKnhbkOog2jv9X5tPytlrFdO+I+auSmME', desc.digest(stem.descriptor.DigestHash.SHA256)) + self.assertEqual('n2+wh6uM+lbKnhbkOog2jv9X5tPytlrFdO+I+auSmME', desc.digest(stem.descriptor.DigestHash.SHA256, stem.descriptor.DigestEncoding.BASE64)) self.assertEqual([], desc.get_unrecognized_lines())
# The read-history, write-history, dirreq-read-history, and
tor-commits@lists.torproject.org