[tor-commits] [stem/master] ControlMessage equality and hashing

atagar at torproject.org atagar at torproject.org
Tue Aug 7 22:19:30 UTC 2018


commit b48ce682ef1a5fc1d4237d106700ec929f49246b
Author: Damian Johnson <atagar at torproject.org>
Date:   Tue Aug 7 15:04:20 2018 -0700

    ControlMessage equality and hashing
    
    Interesting! Previously I implemented comparability for a single event type but
    that's all. I'm surprised we've gone this long without needing it.
    
    Messages have only three things we need to account for in the hash...
    
      * The message content (ie. the bytes we got from tor).
      * The message class, if we've casted ourselves to something.
      * The arrived_at timestamp if this is an event.
    
    That's it. Everything else is derived from those.
    
    Changing hash_attr()'s parent argument back to a class because a boolean makes
    it pretty likely we'll hit an invinite loop. In this case we hit it because we
    had the following object hierarchy...
    
      ControlMessage  <= has a hash method
      Event           <= has a hash method
      BandwidthEvent  <= our object type
    
    If our Event class' hash method calls super it will return the Event class, not
    ControlMessage, causing us to loop indefinitely. Changing hash_attr() so it
    takes an explicit reference for our parent instead.
---
 docs/change_log.rst                   |  1 +
 stem/__init__.py                      |  2 +-
 stem/directory.py                     |  4 ++--
 stem/response/__init__.py             | 17 +++++++++++---
 stem/response/events.py               | 43 ++++++++++-------------------------
 stem/util/__init__.py                 |  6 +++--
 test/unit/response/control_message.py | 24 +++++++++++++++++++
 7 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index 282a02c8..362e4e10 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -58,6 +58,7 @@ The following are only available within Stem's `git repository
   * Added the delivered_read, delivered_written, overhead_read, and overhead_written attributes to :class:`~stem.response.events.CircuitBandwidthEvent` (:spec:`fbb38ec`)
   * The *config* attribute of :class:`~stem.response.events.ConfChangedEvent` couldn't represent tor configuration options with multiple values. It has been replaced with new *changed* and *unset* attributes.
   * Replaced socket's :func:`~stem.socket.ControlPort.get_address`, :func:`~stem.socket.ControlPort.get_port`, and :func:`~stem.socket.ControlSocketFile.get_socket_path` with attributes
+  * :class:`~stem.response.ControlMessage` is now comparable and hashable
   * Removed 'raw' argument from :func:`~stem.socket.ControlSocket.send`
 
  * **Descriptors**
diff --git a/stem/__init__.py b/stem/__init__.py
index 6da73320..1679b0a0 100644
--- a/stem/__init__.py
+++ b/stem/__init__.py
@@ -601,7 +601,7 @@ class ORPort(Endpoint):
 
   def __hash__(self):
     if self._hash is None:
-      self._hash = stem.util._hash_attr(self, 'link_protocols', parent = True)
+      self._hash = stem.util._hash_attr(self, 'link_protocols', parent = Endpoint)
 
     return self._hash
 
diff --git a/stem/directory.py b/stem/directory.py
index 020ac696..5d243707 100644
--- a/stem/directory.py
+++ b/stem/directory.py
@@ -316,7 +316,7 @@ class Authority(Directory):
 
   def __hash__(self):
     if self._hash is None:
-      self._hash = stem.util._hash_attr(self, 'v3ident', 'is_bandwidth_authority', parent = True)
+      self._hash = stem.util._hash_attr(self, 'v3ident', 'is_bandwidth_authority', parent = Directory)
 
     return self._hash
 
@@ -521,7 +521,7 @@ class Fallback(Directory):
 
   def __hash__(self):
     if self._hash is None:
-      self._hash = stem.util._hash_attr(self, 'has_extrainfo', 'header', parent = True)
+      self._hash = stem.util._hash_attr(self, 'has_extrainfo', 'header', parent = Directory)
 
     return self._hash
 
diff --git a/stem/response/__init__.py b/stem/response/__init__.py
index 327be819..54eb9c44 100644
--- a/stem/response/__init__.py
+++ b/stem/response/__init__.py
@@ -16,9 +16,7 @@ Parses replies from the control socket.
     |- from_str - provides a ControlMessage for the given string
     |- is_ok - response had a 250 status
     |- content - provides the parsed message content
-    |- raw_content - unparsed socket data
-    |- __str__ - content stripped of protocol formatting
-    +- __iter__ - ControlLine entries for the content of the message
+    +- raw_content - unparsed socket data
 
   ControlLine - String subclass with methods for parsing controller responses.
     |- remainder - provides the unparsed content
@@ -36,6 +34,7 @@ import re
 import threading
 
 import stem.socket
+import stem.util
 import stem.util.str_tools
 
 __all__ = [
@@ -129,6 +128,9 @@ class ControlMessage(object):
   Message from the control socket. This is iterable and can be stringified for
   individual message components stripped of protocol formatting. Messages are
   never empty.
+
+  .. versionchanged:: 1.7.0
+     Implemented equality and hashing.
   """
 
   @staticmethod
@@ -299,6 +301,15 @@ class ControlMessage(object):
 
     return ControlLine(content)
 
+  def __hash__(self):
+    return stem.util._hash_attr(self, '_raw_content')
+
+  def __eq__(self, other):
+    return hash(self) == hash(other) if isinstance(other, ControlMessage) else False
+
+  def __ne__(self, other):
+    return not self == other
+
 
 class ControlLine(str):
   """
diff --git a/stem/response/events.py b/stem/response/events.py
index 5623d1a7..37b60b38 100644
--- a/stem/response/events.py
+++ b/stem/response/events.py
@@ -57,6 +57,7 @@ class Event(stem.response.ControlMessage):
 
     self.type = str(self).split()[0]
     self.arrived_at = arrived_at
+    self._hash = None
 
     # if we're a recognized event type then translate ourselves into that subclass
 
@@ -71,6 +72,12 @@ class Event(stem.response.ControlMessage):
 
     self._parse()
 
+  def __hash__(self):
+    if self._hash is None:
+      self._hash = stem.util._hash_attr(self, 'arrived_at', parent = stem.response.ControlMessage)
+
+    return self._hash
+
   def _parse_standard_attr(self):
     """
     Most events are of the form...
@@ -410,41 +417,15 @@ class CircuitEvent(Event):
     self._log_if_unrecognized('remote_reason', stem.CircClosureReason)
 
   def _compare(self, other, method):
+    # sorting circuit events by their identifier
+
     if not isinstance(other, CircuitEvent):
       return False
 
-    for attr in ('id', 'status', 'path', 'build_flags', 'purpose', 'hs_state', 'rend_query', 'created', 'reason', 'remote_reason', 'socks_username', 'socks_password'):
-      my_attr = getattr(self, attr)
-      other_attr = getattr(other, attr)
-
-      # Our id attribute is technically a string, but Tor conventionally uses
-      # ints. Attempt to handle as ints if that's the case so we get numeric
-      # ordering.
-
-      if attr == 'id' and my_attr and other_attr:
-        if my_attr.isdigit() and other_attr.isdigit():
-          my_attr = int(my_attr)
-          other_attr = int(other_attr)
-
-      if my_attr is None:
-        my_attr = ''
-
-      if other_attr is None:
-        other_attr = ''
-
-      if my_attr != other_attr:
-        return method(my_attr, other_attr)
-
-    return True
-
-  def __hash__(self):
-    return hash(str(self).strip())
-
-  def __eq__(self, other):
-    return self._compare(other, lambda s, o: s == o)
+    my_id = getattr(self, 'id')
+    their_id = getattr(other, 'id')
 
-  def __ne__(self, other):
-    return not self == other
+    return method(my_id, their_id) if my_id != their_id else method(hash(self), hash(other))
 
   def __gt__(self, other):
     return self._compare(other, lambda s, o: s > o)
diff --git a/stem/util/__init__.py b/stem/util/__init__.py
index cc40bd0a..f0c2397b 100644
--- a/stem/util/__init__.py
+++ b/stem/util/__init__.py
@@ -130,10 +130,12 @@ def _hash_attr(obj, *attributes, **kwargs):
 
   :param Object obj: object to be hashed
   :param list attributes: attribute names to take into account
-  :param bool parent: include parent's hash value
+  :param class parent: include parent's hash value
   """
 
-  my_hash = super(type(obj), obj).__hash__() if kwargs.get('parent') else 0
+  parent_class = kwargs.get('parent')
+
+  my_hash = parent_class.__hash__(obj) if parent_class else 0
   my_hash = my_hash * 1024 + hash(str(type(obj)))
 
   for attr in attributes:
diff --git a/test/unit/response/control_message.py b/test/unit/response/control_message.py
index 8b05d6a8..abf5debf 100644
--- a/test/unit/response/control_message.py
+++ b/test/unit/response/control_message.py
@@ -168,6 +168,30 @@ class TestControlMessage(unittest.TestCase):
     control_socket_file = control_socket.makefile()
     self.assertRaises(stem.SocketClosed, stem.socket.recv_message, control_socket_file)
 
+  def test_equality(self):
+    msg = stem.response.ControlMessage.from_str(EVENT_BW)
+    event_msg = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT')
+
+    # basic check for identical and differing events
+
+    self.assertEqual(msg, stem.response.ControlMessage.from_str(EVENT_BW))
+    self.assertNotEqual(msg, stem.response.ControlMessage.from_str(EVENT_CIRC_TIMEOUT))
+
+    # casting to different message types should cause us to mismatch
+
+    self.assertNotEqual(event_msg, msg)
+    stem.response.convert('EVENT', msg)
+    self.assertEqual(event_msg, msg)
+
+    # events also take into account when they were received
+
+    event1 = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT', arrived_at = 123)
+    event2 = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT', arrived_at = 456)
+    event3 = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT', arrived_at = 123)
+
+    self.assertNotEqual(event1, event2)
+    self.assertEqual(event1, event3)
+
   def _assert_message_parses(self, controller_reply):
     """
     Performs some basic sanity checks that a reply mirrors its parsed result.



More information about the tor-commits mailing list