commit b48ce682ef1a5fc1d4237d106700ec929f49246b Author: Damian Johnson atagar@torproject.org Date: Tue Aug 7 15:04:20 2018 -0700
ControlMessage equality and hashing
Interesting! Previously I implemented comparability for a single event type but that's all. I'm surprised we've gone this long without needing it.
Messages have only three things we need to account for in the hash...
* The message content (ie. the bytes we got from tor). * The message class, if we've casted ourselves to something. * The arrived_at timestamp if this is an event.
That's it. Everything else is derived from those.
Changing hash_attr()'s parent argument back to a class because a boolean makes it pretty likely we'll hit an invinite loop. In this case we hit it because we had the following object hierarchy...
ControlMessage <= has a hash method Event <= has a hash method BandwidthEvent <= our object type
If our Event class' hash method calls super it will return the Event class, not ControlMessage, causing us to loop indefinitely. Changing hash_attr() so it takes an explicit reference for our parent instead. --- docs/change_log.rst | 1 + stem/__init__.py | 2 +- stem/directory.py | 4 ++-- stem/response/__init__.py | 17 +++++++++++--- stem/response/events.py | 43 ++++++++++------------------------- stem/util/__init__.py | 6 +++-- test/unit/response/control_message.py | 24 +++++++++++++++++++ 7 files changed, 58 insertions(+), 39 deletions(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst index 282a02c8..362e4e10 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -58,6 +58,7 @@ The following are only available within Stem's `git repository * Added the delivered_read, delivered_written, overhead_read, and overhead_written attributes to :class:`~stem.response.events.CircuitBandwidthEvent` (:spec:`fbb38ec`) * The *config* attribute of :class:`~stem.response.events.ConfChangedEvent` couldn't represent tor configuration options with multiple values. It has been replaced with new *changed* and *unset* attributes. * Replaced socket's :func:`~stem.socket.ControlPort.get_address`, :func:`~stem.socket.ControlPort.get_port`, and :func:`~stem.socket.ControlSocketFile.get_socket_path` with attributes + * :class:`~stem.response.ControlMessage` is now comparable and hashable * Removed 'raw' argument from :func:`~stem.socket.ControlSocket.send`
* **Descriptors** diff --git a/stem/__init__.py b/stem/__init__.py index 6da73320..1679b0a0 100644 --- a/stem/__init__.py +++ b/stem/__init__.py @@ -601,7 +601,7 @@ class ORPort(Endpoint):
def __hash__(self): if self._hash is None: - self._hash = stem.util._hash_attr(self, 'link_protocols', parent = True) + self._hash = stem.util._hash_attr(self, 'link_protocols', parent = Endpoint)
return self._hash
diff --git a/stem/directory.py b/stem/directory.py index 020ac696..5d243707 100644 --- a/stem/directory.py +++ b/stem/directory.py @@ -316,7 +316,7 @@ class Authority(Directory):
def __hash__(self): if self._hash is None: - self._hash = stem.util._hash_attr(self, 'v3ident', 'is_bandwidth_authority', parent = True) + self._hash = stem.util._hash_attr(self, 'v3ident', 'is_bandwidth_authority', parent = Directory)
return self._hash
@@ -521,7 +521,7 @@ class Fallback(Directory):
def __hash__(self): if self._hash is None: - self._hash = stem.util._hash_attr(self, 'has_extrainfo', 'header', parent = True) + self._hash = stem.util._hash_attr(self, 'has_extrainfo', 'header', parent = Directory)
return self._hash
diff --git a/stem/response/__init__.py b/stem/response/__init__.py index 327be819..54eb9c44 100644 --- a/stem/response/__init__.py +++ b/stem/response/__init__.py @@ -16,9 +16,7 @@ Parses replies from the control socket. |- from_str - provides a ControlMessage for the given string |- is_ok - response had a 250 status |- content - provides the parsed message content - |- raw_content - unparsed socket data - |- __str__ - content stripped of protocol formatting - +- __iter__ - ControlLine entries for the content of the message + +- raw_content - unparsed socket data
ControlLine - String subclass with methods for parsing controller responses. |- remainder - provides the unparsed content @@ -36,6 +34,7 @@ import re import threading
import stem.socket +import stem.util import stem.util.str_tools
__all__ = [ @@ -129,6 +128,9 @@ class ControlMessage(object): Message from the control socket. This is iterable and can be stringified for individual message components stripped of protocol formatting. Messages are never empty. + + .. versionchanged:: 1.7.0 + Implemented equality and hashing. """
@staticmethod @@ -299,6 +301,15 @@ class ControlMessage(object):
return ControlLine(content)
+ def __hash__(self): + return stem.util._hash_attr(self, '_raw_content') + + def __eq__(self, other): + return hash(self) == hash(other) if isinstance(other, ControlMessage) else False + + def __ne__(self, other): + return not self == other +
class ControlLine(str): """ diff --git a/stem/response/events.py b/stem/response/events.py index 5623d1a7..37b60b38 100644 --- a/stem/response/events.py +++ b/stem/response/events.py @@ -57,6 +57,7 @@ class Event(stem.response.ControlMessage):
self.type = str(self).split()[0] self.arrived_at = arrived_at + self._hash = None
# if we're a recognized event type then translate ourselves into that subclass
@@ -71,6 +72,12 @@ class Event(stem.response.ControlMessage):
self._parse()
+ def __hash__(self): + if self._hash is None: + self._hash = stem.util._hash_attr(self, 'arrived_at', parent = stem.response.ControlMessage) + + return self._hash + def _parse_standard_attr(self): """ Most events are of the form... @@ -410,41 +417,15 @@ class CircuitEvent(Event): self._log_if_unrecognized('remote_reason', stem.CircClosureReason)
def _compare(self, other, method): + # sorting circuit events by their identifier + if not isinstance(other, CircuitEvent): return False
- for attr in ('id', 'status', 'path', 'build_flags', 'purpose', 'hs_state', 'rend_query', 'created', 'reason', 'remote_reason', 'socks_username', 'socks_password'): - my_attr = getattr(self, attr) - other_attr = getattr(other, attr) - - # Our id attribute is technically a string, but Tor conventionally uses - # ints. Attempt to handle as ints if that's the case so we get numeric - # ordering. - - if attr == 'id' and my_attr and other_attr: - if my_attr.isdigit() and other_attr.isdigit(): - my_attr = int(my_attr) - other_attr = int(other_attr) - - if my_attr is None: - my_attr = '' - - if other_attr is None: - other_attr = '' - - if my_attr != other_attr: - return method(my_attr, other_attr) - - return True - - def __hash__(self): - return hash(str(self).strip()) - - def __eq__(self, other): - return self._compare(other, lambda s, o: s == o) + my_id = getattr(self, 'id') + their_id = getattr(other, 'id')
- def __ne__(self, other): - return not self == other + return method(my_id, their_id) if my_id != their_id else method(hash(self), hash(other))
def __gt__(self, other): return self._compare(other, lambda s, o: s > o) diff --git a/stem/util/__init__.py b/stem/util/__init__.py index cc40bd0a..f0c2397b 100644 --- a/stem/util/__init__.py +++ b/stem/util/__init__.py @@ -130,10 +130,12 @@ def _hash_attr(obj, *attributes, **kwargs):
:param Object obj: object to be hashed :param list attributes: attribute names to take into account - :param bool parent: include parent's hash value + :param class parent: include parent's hash value """
- my_hash = super(type(obj), obj).__hash__() if kwargs.get('parent') else 0 + parent_class = kwargs.get('parent') + + my_hash = parent_class.__hash__(obj) if parent_class else 0 my_hash = my_hash * 1024 + hash(str(type(obj)))
for attr in attributes: diff --git a/test/unit/response/control_message.py b/test/unit/response/control_message.py index 8b05d6a8..abf5debf 100644 --- a/test/unit/response/control_message.py +++ b/test/unit/response/control_message.py @@ -168,6 +168,30 @@ class TestControlMessage(unittest.TestCase): control_socket_file = control_socket.makefile() self.assertRaises(stem.SocketClosed, stem.socket.recv_message, control_socket_file)
+ def test_equality(self): + msg = stem.response.ControlMessage.from_str(EVENT_BW) + event_msg = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT') + + # basic check for identical and differing events + + self.assertEqual(msg, stem.response.ControlMessage.from_str(EVENT_BW)) + self.assertNotEqual(msg, stem.response.ControlMessage.from_str(EVENT_CIRC_TIMEOUT)) + + # casting to different message types should cause us to mismatch + + self.assertNotEqual(event_msg, msg) + stem.response.convert('EVENT', msg) + self.assertEqual(event_msg, msg) + + # events also take into account when they were received + + event1 = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT', arrived_at = 123) + event2 = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT', arrived_at = 456) + event3 = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT', arrived_at = 123) + + self.assertNotEqual(event1, event2) + self.assertEqual(event1, event3) + def _assert_message_parses(self, controller_reply): """ Performs some basic sanity checks that a reply mirrors its parsed result.