[tor-commits] [stem/master] Account for attribute types in hash and equality

atagar at torproject.org atagar at torproject.org
Thu Jun 7 22:22:27 UTC 2018


commit 4306013ab6e868e99553fbb78ed14ef51a896b7d
Author: Damian Johnson <atagar at torproject.org>
Date:   Thu Jun 7 15:20:16 2018 -0700

    Account for attribute types in hash and equality
    
    Proper equality checks and hash functions should take attribute types into
    account. Unsurprisingly this uncovered a few small bugs where we forgot to
    normalize constructor inputs.
---
 docs/change_log.rst   |  3 ++-
 stem/directory.py     |  6 ++---
 stem/exit_policy.py   |  2 ++
 stem/manual.py        | 22 ++++++++--------
 stem/util/__init__.py | 69 ++++++++++++++++++++++++++++++++++++---------------
 5 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index 944d3577..e735d6b2 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -22,7 +22,7 @@ Versioning
 
 Stem uses `semantic versioning <http://semver.org/>`_, which means that
 **versions consist of three numbers** (such as '**1.2.4**'). These are used to
-convey the kind of backward compatibility a release has...
+convey the kind of backward compatibility you can expect...
 
  * The first value is the **major version**. This changes infrequently, and
    indicates that backward incompatible changes have been made (such as the
@@ -80,6 +80,7 @@ The following are only available within Stem's `git repository
 
   * Fixed PyPy compatibility (:trac:`26207`)
   * Connection information from proc limited to 10,000 results
+  * Accouting for attribute types in most equality checks and hashes
 
  * **Website**
 
diff --git a/stem/directory.py b/stem/directory.py
index 5d1a352d..600c9721 100644
--- a/stem/directory.py
+++ b/stem/directory.py
@@ -352,19 +352,19 @@ class Fallback(Directory):
   .. versionadded:: 1.5.0
 
   .. versionchanged:: 1.7.0
-     Added the has_extrainfo, and header attributes which are part of
+     Added the has_extrainfo and header attributes which are part of
      the `second version of the fallback directories
      <https://lists.torproject.org/pipermail/tor-dev/2017-December/012721.html>`_.
 
   :var bool has_extrainfo: **True** if the relay should be able to provide
     extrainfo descriptors, **False** otherwise.
-  :var dict header: metadata about the fallback directory file this originated from
+  :var collections.OrderedDict header: metadata about the fallback directory file this originated from
   """
 
   def __init__(self, address = None, or_port = None, dir_port = None, fingerprint = None, nickname = None, has_extrainfo = False, orport_v6 = None, header = None):
     super(Fallback, self).__init__(address, or_port, dir_port, fingerprint, nickname, orport_v6)
     self.has_extrainfo = has_extrainfo
-    self.header = header if header else OrderedDict()
+    self.header = OrderedDict(header) if header else OrderedDict()
 
   @staticmethod
   def from_cache(path = FALLBACK_CACHE_PATH):
diff --git a/stem/exit_policy.py b/stem/exit_policy.py
index 27475858..0b956db9 100644
--- a/stem/exit_policy.py
+++ b/stem/exit_policy.py
@@ -656,6 +656,8 @@ class ExitPolicyRule(object):
     # policy ::= "accept[6]" exitpattern | "reject[6]" exitpattern
     # exitpattern ::= addrspec ":" portspec
 
+    rule = stem.util.str_tools._to_unicode(rule)
+
     self.is_accept = rule.startswith('accept')
     is_ipv6_only = rule.startswith('accept6') or rule.startswith('reject6')
 
diff --git a/stem/manual.py b/stem/manual.py
index f8e7ea96..47c16d42 100644
--- a/stem/manual.py
+++ b/stem/manual.py
@@ -359,11 +359,11 @@ class Manual(object):
   :var str synopsis: brief tor command usage
   :var str description: general description of what tor does
 
-  :var dict commandline_options: mapping of commandline arguments to their descripton
-  :var dict signals: mapping of signals tor accepts to their description
-  :var dict files: mapping of file paths to their description
+  :var collections.OrderedDict commandline_options: mapping of commandline arguments to their descripton
+  :var collections.OrderedDict signals: mapping of signals tor accepts to their description
+  :var collections.OrderedDict files: mapping of file paths to their description
 
-  :var dict config_options: :class:`~stem.manual.ConfigOption` tuples for tor configuration options
+  :var collections.OrderedDict config_options: :class:`~stem.manual.ConfigOption` tuples for tor configuration options
 
   :var str man_commit: latest tor commit editing the man page when this
     information was cached
@@ -374,10 +374,10 @@ class Manual(object):
     self.name = name
     self.synopsis = synopsis
     self.description = description
-    self.commandline_options = commandline_options
-    self.signals = signals
-    self.files = files
-    self.config_options = config_options
+    self.commandline_options = OrderedDict(commandline_options)
+    self.signals = OrderedDict(signals)
+    self.files = OrderedDict(files)
+    self.config_options = OrderedDict(config_options)
     self.man_commit = None
     self.stem_commit = None
     self.schema = None
@@ -479,9 +479,9 @@ class Manual(object):
       conf.get('name', ''),
       conf.get('synopsis', ''),
       conf.get('description', ''),
-      conf.get('commandline_options', {}),
-      conf.get('signals', {}),
-      conf.get('files', {}),
+      conf.get('commandline_options', OrderedDict()),
+      conf.get('signals', OrderedDict()),
+      conf.get('files', OrderedDict()),
       config_options,
     )
 
diff --git a/stem/util/__init__.py b/stem/util/__init__.py
index c2b4999f..f1a7be6a 100644
--- a/stem/util/__init__.py
+++ b/stem/util/__init__.py
@@ -24,11 +24,51 @@ __all__ = [
   'datetime_to_unix',
 ]
 
-# Python hashes booleans to zero or one. Usually this would be fine, but since
-# we use hashes for equality checks we need them to be something less common.
+# Beginning with Stem 1.7 we take attribute types into account when hashing
+# and checking equality. That is to say, if two Stem classes' attributes are
+# the same but use different types we no longer consider them to be equal.
+# For example...
+#
+#   s1 = Schedule(classes = ['Math', 'Art', 'PE'])
+#   s2 = Schedule(classes = ('Math', 'Art', 'PE'))
+#
+# Prior to Stem 1.7 s1 and s2 would be equal, but afterward unless Stem's
+# construcotr normalizes the types they won't.
+#
+# This change in behavior is the right thing to do but carries some risk, so
+# we provide the following constant to revert to legacy behavior. If you find
+# yourself using it them please let me know (https://www.atagar.com/contact/)
+# since this flag will go away in the future.
+
+HASH_TYPES = True
+
+
+def _hash_value(val):
+  if not HASH_TYPES:
+    my_hash = 0
+  else:
+    my_hash = hash(type(val))
+
+    # TODO: I hate doing this but until Python 2.x support is dropped we
+    # can't readily be strict about bytes vs unicode for attributes. This
+    # is because test assertions often use strings, and normalizing this
+    # would require wrapping most with to_unicode() calls.
+    #
+    # This hack will go away when we drop Python 2.x support.
+
+    if _is_str(val):
+      my_hash = hash(type(str))
+
+  if isinstance(val, (tuple, list)):
+    for v in val:
+      my_hash = (my_hash * 1024) + hash(v)
+  elif isinstance(val, dict):
+    for k in sorted(val.keys()):
+      my_hash = (my_hash * 2048) + (hash(k) * 1024) + hash(val[k])
+  else:
+    my_hash += hash(val)
 
-TRUE_HASH_VALUE = 4813749
-FALSE_HASH_VALUE = 5826450
+  return my_hash
 
 
 def _is_str(val):
@@ -91,23 +131,12 @@ def _hash_attr(obj, *attributes, **kwargs):
   :param class parent: parent object to include in the hash value
   """
 
-  my_hash = 0 if kwargs.get('parent') is None else kwargs.get('parent').__hash__(obj)
+  # TODO: deal with this parent thing
+
+  my_hash = hash(type(obj)) if kwargs.get('parent') is None else kwargs.get('parent').__hash__(obj)
 
   for attr in attributes:
-    my_hash *= 1024
-
-    attr_value = getattr(obj, attr)
-
-    if attr_value is not None:
-      if isinstance(attr_value, dict):
-        for k in sorted(attr_value.keys()):
-          my_hash = (my_hash + hash(k)) * 1024 + hash(attr_value[k])
-      elif isinstance(attr_value, (list, tuple)):
-        for entry in attr_value:
-          my_hash = (my_hash + hash(entry)) * 1024
-      elif isinstance(attr_value, bool):
-        my_hash += TRUE_HASH_VALUE if attr_value else FALSE_HASH_VALUE
-      else:
-        my_hash += hash(attr_value)
+    val = getattr(obj, attr)
+    my_hash = my_hash * 1024 + _hash_value(val)
 
   return my_hash



More information about the tor-commits mailing list