[tor-commits] [stem/master] Using @lru_cache for exit policies

Tue Oct 8 07:50:17 UTC 2013

commit 19cca3a04b9b02e94d22ebb877af3ca2ca9ad5e4
Author: Damian Johnson <atagar at torproject.org>
Date:   Mon Oct 7 21:14:00 2013 -0700

    Using @lru_cache for exit policies
    
    The ExitPolicy and ExitPolicyRule classes are a perfect fit for @lru_cache.
    They're read-only classes that already do a fair bit of caching. The annotation
    lets us avoid doing this ourselves.
---
 stem/exit_policy.py             |  250 +++++++++++++++++----------------------
 stem/util/lru_cache.py          |   10 +-
 test/unit/exit_policy/policy.py |   22 ----
 3 files changed, 114 insertions(+), 168 deletions(-)

diff --git a/stem/exit_policy.py b/stem/exit_policy.py
index 17f3508..930c8bb 100644
--- a/stem/exit_policy.py
+++ b/stem/exit_policy.py
@@ -56,9 +56,16 @@ exiting to a destination is permissible or not. For instance...
   ============ ===========
 """
 
+import stem.prereq
 import stem.util.connection
 import stem.util.enum
 
+try:
+  # added in python 3.2
+  from collections import lru_cache
+except ImportError:
+  from stem.util.lru_cache import lru_cache
+
 AddressType = stem.util.enum.Enum(("WILDCARD", "Wildcard"), ("IPv4", "IPv4"), ("IPv6", "IPv6"))
 
 # Addresses aliased by the 'private' policy. From the tor man page...
@@ -77,21 +84,6 @@ PRIVATE_ADDRESSES = (
 )
 
 
-# TODO: The ExitPolicyRule could easily be a mutable class if we did the
-# following...
-#
-# * Provided setter methods that acquired an RLock which also wrapped all of
-#   our current methods to provide thread safety.
-#
-# * Reset our derived attributes (self._addr_bin, self._mask_bin, and
-#   self._str_representation) when we changed something that it was based on.
-#
-# That said, I'm not sure if this is entirely desirable since for most use
-# cases we *want* the caller to have an immutable ExitPolicy (since it
-# reflects something they... well, can't modify). However, I can think of
-# some use cases where we might want to construct custom policies. Maybe make
-# it a CustomExitPolicyRule subclass?
-
 def get_config_policy(rules):
   """
   Converts an ExitPolicy found in a torrc to a proper exit pattern. This
@@ -155,12 +147,20 @@ class ExitPolicy(object):
       if not isinstance(rule, (bytes, unicode, ExitPolicyRule)):
         raise TypeError("Exit policy rules can only contain strings or ExitPolicyRules, got a %s (%s)" % (type(rule), rules))
 
-    self._rules = None          # lazily loaded series of ExitPolicyRule
-    self._input_rules = rules   # input rules, only kept until self._rules is set
+    # Unparsed representation of the rules we were constructed with. Our
+    # _get_rules() method consumes this to provide ExitPolicyRule instances.
+    # This is lazily evaluated so we don't need to actually parse the exit
+    # policy if it's never used.
+
+    self._input_rules = rules
+
+    # Result when no rules apply. According to the spec policies default to 'is
+    # allowed', but our microdescriptor policy subclass might want to change
+    # this.
+
     self._is_allowed_default = True
-    self._summary_representation = None
-    self._can_exit_to_cache = {}
 
+  @lru_cache()
   def can_exit_to(self, address = None, port = None, strict = False):
     """
     Checks if this policy allows exiting to a given destination or not. If the
@@ -175,18 +175,13 @@ class ExitPolicy(object):
     :returns: **True** if exiting to this destination is allowed, **False** otherwise
     """
 
-    if not (address, port, strict) in self._can_exit_to_cache:
-      result = self._is_allowed_default
-
-      for rule in self._get_rules():
-        if rule.is_match(address, port, strict):
-          result = rule.is_accept
-          break
-
-      self._can_exit_to_cache[(address, port, strict)] = result
+    for rule in self._get_rules():
+      if rule.is_match(address, port, strict):
+        return rule.is_accept
 
-    return self._can_exit_to_cache[(address, port, strict)]
+    return self._is_allowed_default
 
+  @lru_cache()
   def is_exiting_allowed(self):
     """
     Provides **True** if the policy allows exiting whatsoever, **False**
@@ -194,6 +189,7 @@ class ExitPolicy(object):
     """
 
     rejected_ports = set()
+
     for rule in self._get_rules():
       if rule.is_accept:
         for port in xrange(rule.min_port, rule.max_port + 1):
@@ -207,6 +203,7 @@ class ExitPolicy(object):
 
     return self._is_allowed_default
 
+  @lru_cache()
   def summary(self):
     """
     Provides a short description of our policy chain, similar to a
@@ -227,131 +224,110 @@ class ExitPolicy(object):
     :returns: **str** with a concise summary for our policy
     """
 
-    if self._summary_representation is None:
-      # determines if we're a white-list or blacklist
-      is_whitelist = not self._is_allowed_default
+    # determines if we're a white-list or blacklist
+    is_whitelist = not self._is_allowed_default
 
-      for rule in self._get_rules():
-        if rule.is_address_wildcard() and rule.is_port_wildcard():
-          is_whitelist = not rule.is_accept
-          break
+    for rule in self._get_rules():
+      if rule.is_address_wildcard() and rule.is_port_wildcard():
+        is_whitelist = not rule.is_accept
+        break
 
-      # Iterates over the policies and adds the the ports we'll return (ie,
-      # allows if a white-list and rejects if a blacklist). Regardless of a
-      # port's allow/reject policy, all further entries with that port are
-      # ignored since policies respect the first matching policy.
+    # Iterates over the policies and adds the the ports we'll return (ie,
+    # allows if a white-list and rejects if a blacklist). Regardless of a
+    # port's allow/reject policy, all further entries with that port are
+    # ignored since policies respect the first matching policy.
 
-      display_ports, skip_ports = [], set()
+    display_ports, skip_ports = [], set()
 
-      for rule in self._get_rules():
-        if not rule.is_address_wildcard():
-          continue
-        elif rule.is_port_wildcard():
-          break
+    for rule in self._get_rules():
+      if not rule.is_address_wildcard():
+        continue
+      elif rule.is_port_wildcard():
+        break
 
-        for port in xrange(rule.min_port, rule.max_port + 1):
-          if port in skip_ports:
-            continue
+      for port in xrange(rule.min_port, rule.max_port + 1):
+        if port in skip_ports:
+          continue
 
-          # if accept + white-list or reject + blacklist then add
-          if rule.is_accept == is_whitelist:
-            display_ports.append(port)
+        # if accept + white-list or reject + blacklist then add
+        if rule.is_accept == is_whitelist:
+          display_ports.append(port)
 
-          # all further entries with this port should be ignored
-          skip_ports.add(port)
+        # all further entries with this port should be ignored
+        skip_ports.add(port)
 
-      # convert port list to a list of ranges (ie, ['1-3'] rather than [1, 2, 3])
-      if display_ports:
-        display_ranges, temp_range = [], []
-        display_ports.sort()
-        display_ports.append(None)  # ending item to include last range in loop
+    # convert port list to a list of ranges (ie, ['1-3'] rather than [1, 2, 3])
+    if display_ports:
+      display_ranges, temp_range = [], []
+      display_ports.sort()
+      display_ports.append(None)  # ending item to include last range in loop
 
-        for port in display_ports:
-          if not temp_range or temp_range[-1] + 1 == port:
-            temp_range.append(port)
+      for port in display_ports:
+        if not temp_range or temp_range[-1] + 1 == port:
+          temp_range.append(port)
+        else:
+          if len(temp_range) > 1:
+            display_ranges.append("%i-%i" % (temp_range[0], temp_range[-1]))
           else:
-            if len(temp_range) > 1:
-              display_ranges.append("%i-%i" % (temp_range[0], temp_range[-1]))
-            else:
-              display_ranges.append(str(temp_range[0]))
-
-            temp_range = [port]
-      else:
-        # everything for the inverse
-        is_whitelist = not is_whitelist
-        display_ranges = ["1-65535"]
+            display_ranges.append(str(temp_range[0]))
 
-      # constructs the summary string
-      label_prefix = "accept " if is_whitelist else "reject "
-
-      self._summary_representation = (label_prefix + ", ".join(display_ranges)).strip()
-
-    return self._summary_representation
-
-  def _set_default_allowed(self, is_allowed_default):
-    """
-    Generally policies end with either an 'reject \*:\*' or 'accept \*:\*'
-    policy, but if it doesn't then is_allowed_default will determine the
-    default response for our :meth:`~stem.exit_policy.ExitPolicy.can_exit_to`
-    method.
-
-    Our default, and tor's, is **True**.
+          temp_range = [port]
+    else:
+      # everything for the inverse
+      is_whitelist = not is_whitelist
+      display_ranges = ["1-65535"]
 
-    :param bool is_allowed_default:
-      :meth:`~stem.exit_policy.ExitPolicy.can_exit_to` default when no rules
-      apply
-    """
+    # constructs the summary string
+    label_prefix = "accept " if is_whitelist else "reject "
 
-    self._is_allowed_default = is_allowed_default
-    self._can_exit_to_cache = {}
+    return (label_prefix + ", ".join(display_ranges)).strip()
 
+  @lru_cache()
   def _get_rules(self):
-    if self._rules is None:
-      rules = []
-      is_all_accept, is_all_reject = True, True
-
-      for rule in self._input_rules:
-        if isinstance(rule, (bytes, unicode)):
-          rule = ExitPolicyRule(rule.strip())
+    rules = []
+    is_all_accept, is_all_reject = True, True
 
-        if rule.is_accept:
-          is_all_reject = False
-        else:
-          is_all_accept = False
+    for rule in self._input_rules:
+      if isinstance(rule, (bytes, unicode)):
+        rule = ExitPolicyRule(rule.strip())
 
-        rules.append(rule)
+      if rule.is_accept:
+        is_all_reject = False
+      else:
+        is_all_accept = False
 
-        if rule.is_address_wildcard() and rule.is_port_wildcard():
-          break  # this is a catch-all, no reason to include more
+      rules.append(rule)
 
-      # If we only have one kind of entry *and* end with a wildcard then
-      # we might as well use the simpler version. For instance...
-      #
-      #   reject *:80, reject *:443, reject *:*
-      #
-      # ... could also be represented as simply...
-      #
-      #   reject *:*
-      #
-      # This mostly comes up with reject-all policies because the
-      # 'reject private:*' appends an extra seven rules that have no
-      # effect.
+      if rule.is_address_wildcard() and rule.is_port_wildcard():
+        break  # this is a catch-all, no reason to include more
 
-      if rules and (rules[-1].is_address_wildcard() and rules[-1].is_port_wildcard()):
-        if is_all_accept:
-          rules = [ExitPolicyRule("accept *:*")]
-        elif is_all_reject:
-          rules = [ExitPolicyRule("reject *:*")]
+    # If we only have one kind of entry *and* end with a wildcard then
+    # we might as well use the simpler version. For instance...
+    #
+    #   reject *:80, reject *:443, reject *:*
+    #
+    # ... could also be represented as simply...
+    #
+    #   reject *:*
+    #
+    # This mostly comes up with reject-all policies because the
+    # 'reject private:*' appends an extra seven rules that have no
+    # effect.
 
-      self._rules = rules
-      self._input_rules = None
+    if rules and (rules[-1].is_address_wildcard() and rules[-1].is_port_wildcard()):
+      if is_all_accept:
+        rules = [ExitPolicyRule("accept *:*")]
+      elif is_all_reject:
+        rules = [ExitPolicyRule("reject *:*")]
 
-    return self._rules
+    self._input_rules = None
+    return rules
 
   def __iter__(self):
     for rule in self._get_rules():
       yield rule
 
+  @lru_cache()
   def __str__(self):
     return ', '.join([str(rule) for rule in self._get_rules()])
 
@@ -430,7 +406,7 @@ class MicroExitPolicy(ExitPolicy):
       rules.append(MicroExitPolicyRule(self.is_accept, int(min_port), int(max_port)))
 
     super(MicroExitPolicy, self).__init__(*rules)
-    self._set_default_allowed(not self.is_accept)
+    self._is_allowed_default = not self.is_accept
 
   def __str__(self):
     return self._policy
@@ -505,12 +481,6 @@ class ExitPolicyRule(object):
     self._apply_addrspec(rule, addrspec)
     self._apply_portspec(rule, portspec)
 
-    # The integer representation of our mask and masked address. These are
-    # lazily loaded and used by our is_match() method to compare ourselves to
-    # other addresses.
-
-    self._mask_bin = self._addr_bin = None
-
     # Lazily loaded string representation of our policy.
 
     self._str_representation = None
@@ -700,21 +670,17 @@ class ExitPolicyRule(object):
 
     return self._str_representation
 
+  @lru_cache()
   def _get_mask_bin(self):
     # provides an integer representation of our mask
 
-    if self._mask_bin is None:
-      self._mask_bin = int(stem.util.connection._get_address_binary(self.get_mask(False)), 2)
-
-    return self._mask_bin
+    return int(stem.util.connection._get_address_binary(self.get_mask(False)), 2)
 
+  @lru_cache()
   def _get_address_bin(self):
     # provides an integer representation of our address
 
-    if self._addr_bin is None:
-      self._addr_bin = int(stem.util.connection._get_address_binary(self.address), 2) & self._mask_bin
-
-    return self._addr_bin
+    return int(stem.util.connection._get_address_binary(self.address), 2) & self._get_mask_bin()
 
   def _apply_addrspec(self, rule, addrspec):
     # Parses the addrspec...
diff --git a/stem/util/lru_cache.py b/stem/util/lru_cache.py
index 5591862..a0b8f1b 100644
--- a/stem/util/lru_cache.py
+++ b/stem/util/lru_cache.py
@@ -20,6 +20,7 @@ from threading import RLock
 
 _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
 
+
 class _HashedSeq(list):
     __slots__ = 'hashvalue'
 
@@ -30,10 +31,11 @@ class _HashedSeq(list):
     def __hash__(self):
         return self.hashvalue
 
+
 def _make_key(args, kwds, typed,
-             kwd_mark = (object(),),
-             fasttypes = {int, str, frozenset, type(None)},
-             sorted=sorted, tuple=tuple, type=type, len=len):
+              kwd_mark = (object(),),
+              fasttypes = {int, str, frozenset, type(None)},
+              sorted=sorted, tuple=tuple, type=type, len=len):
     'Make a cache key from optionally typed positional and keyword arguments'
     key = args
     if kwds:
@@ -49,6 +51,7 @@ def _make_key(args, kwds, typed,
         return key[0]
     return _HashedSeq(key)
 
+
 def lru_cache(maxsize=100, typed=False):
     """Least-recently-used cache decorator.
 
@@ -146,7 +149,6 @@ def lru_cache(maxsize=100, typed=False):
                         # empty the oldest link and make it the new root
                         root = nonlocal_root[0] = oldroot[NEXT]
                         oldkey = root[KEY]
-                        oldvalue = root[RESULT]
                         root[KEY] = root[RESULT] = None
                         # now update the cache dictionary for the new links
                         del cache[oldkey]
diff --git a/test/unit/exit_policy/policy.py b/test/unit/exit_policy/policy.py
index 8659b68..161bae0 100644
--- a/test/unit/exit_policy/policy.py
+++ b/test/unit/exit_policy/policy.py
@@ -47,28 +47,6 @@ class TestExitPolicy(unittest.TestCase):
     policy = ExitPolicy(*"reject *:80, reject *:443, reject *:*".split(","))
     self.assertEquals(ExitPolicy("reject *:*"), policy)
 
-  def test_set_default_allowed(self):
-    policy = ExitPolicy('reject *:80', 'accept *:443')
-
-    # our default for being allowed defaults to True
-    self.assertFalse(policy.can_exit_to("75.119.206.243", 80))
-    self.assertTrue(policy.can_exit_to("75.119.206.243", 443))
-    self.assertTrue(policy.can_exit_to("75.119.206.243", 999))
-
-    policy._set_default_allowed(False)
-    self.assertFalse(policy.can_exit_to("75.119.206.243", 80))
-    self.assertTrue(policy.can_exit_to("75.119.206.243", 443))
-    self.assertFalse(policy.can_exit_to("75.119.206.243", 999))
-
-    # Our is_exiting_allowed() is also influcenced by this flag if we lack any
-    # 'accept' rules.
-
-    policy = ExitPolicy()
-    self.assertTrue(policy.is_exiting_allowed())
-
-    policy._set_default_allowed(False)
-    self.assertFalse(policy.is_exiting_allowed())
-
   def test_can_exit_to(self):
     # Basic sanity test for our can_exit_to() method. Most of the interesting
     # use cases (ip masks, wildcards, etc) are covered by the ExitPolicyRule