[tor-commits] [stem/master] Adding python 2.x backport of @lru_cache

atagar at torproject.org atagar at torproject.org
Mon Oct 7 00:41:29 UTC 2013


commit d9fbdc968479b0d413d1ff875ca19c855e7ce2ae
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Oct 6 17:32:10 2013 -0700

    Adding python 2.x backport of @lru_cache
    
    Python 3.2 added a memoization annotation to python's functools module. This is
    very, very handy, allowing us to avoid the caching boilerplate I do way too
    often...
    
      def get_foo():
        if self._foo is None:
          ... stuff to calculate self._foo...
    
        return self._foo
    
    With a memoization function this becomes...
    
      @lru_cache()
      def get_foo():
        ... stuff to calculate self._foo...
    
    This is a MIT licensed backport from...
    
      http://code.activestate.com/recipes/578078-py26-and-py30-backport-of-python-33s-lru-cache/
    
    Looking forward to when we require python 3.2 so we can use the builtin!
---
 stem/util/__init__.py  |    1 +
 stem/util/lru_cache.py |  180 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 181 insertions(+)

diff --git a/stem/util/__init__.py b/stem/util/__init__.py
index 8ec8eaf..dacd804 100644
--- a/stem/util/__init__.py
+++ b/stem/util/__init__.py
@@ -10,6 +10,7 @@ __all__ = [
   "connection",
   "enum",
   "log",
+  "lru_cache",
   "ordereddict",
   "proc",
   "system",
diff --git a/stem/util/lru_cache.py b/stem/util/lru_cache.py
new file mode 100644
index 0000000..5591862
--- /dev/null
+++ b/stem/util/lru_cache.py
@@ -0,0 +1,180 @@
+# Drop in replace for python 3.2's collections.lru_cache, from...
+# http://code.activestate.com/recipes/578078-py26-and-py30-backport-of-python-33s-lru-cache/
+#
+# ... which is under the MIT license. Stem users should *not* rely upon this
+# module. It will be removed when we drop support for python 3.2 and below.
+
+"""
+Memoization decorator that caches a function's return value. If later called
+with the same arguments then the cached value is returned rather than
+reevaluated.
+
+This is a a python 2.x port of `functools.lru_cache
+<http://docs.python.org/3/library/functools.html#functools.lru_cache>`_. If
+using python 3.2 or later you should use that instead.
+"""
+
+from collections import namedtuple
+from functools import update_wrapper
+from threading import RLock
+
+_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
+
+class _HashedSeq(list):
+    __slots__ = 'hashvalue'
+
+    def __init__(self, tup, hash=hash):
+        self[:] = tup
+        self.hashvalue = hash(tup)
+
+    def __hash__(self):
+        return self.hashvalue
+
+def _make_key(args, kwds, typed,
+             kwd_mark = (object(),),
+             fasttypes = {int, str, frozenset, type(None)},
+             sorted=sorted, tuple=tuple, type=type, len=len):
+    'Make a cache key from optionally typed positional and keyword arguments'
+    key = args
+    if kwds:
+        sorted_items = sorted(kwds.items())
+        key += kwd_mark
+        for item in sorted_items:
+            key += item
+    if typed:
+        key += tuple(type(v) for v in args)
+        if kwds:
+            key += tuple(type(v) for k, v in sorted_items)
+    elif len(key) == 1 and type(key[0]) in fasttypes:
+        return key[0]
+    return _HashedSeq(key)
+
+def lru_cache(maxsize=100, typed=False):
+    """Least-recently-used cache decorator.
+
+    If *maxsize* is set to None, the LRU features are disabled and the cache
+    can grow without bound.
+
+    If *typed* is True, arguments of different types will be cached separately.
+    For example, f(3.0) and f(3) will be treated as distinct calls with
+    distinct results.
+
+    Arguments to the cached function must be hashable.
+
+    View the cache statistics named tuple (hits, misses, maxsize, currsize) with
+    f.cache_info().  Clear the cache and statistics with f.cache_clear().
+    Access the underlying function with f.__wrapped__.
+
+    See:  http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
+
+    """
+
+    # Users should only access the lru_cache through its public API:
+    #       cache_info, cache_clear, and f.__wrapped__
+    # The internals of the lru_cache are encapsulated for thread safety and
+    # to allow the implementation to change (including a possible C version).
+
+    def decorating_function(user_function):
+
+        cache = dict()
+        stats = [0, 0]                  # make statistics updateable non-locally
+        HITS, MISSES = 0, 1             # names for the stats fields
+        make_key = _make_key
+        cache_get = cache.get           # bound method to lookup key or return None
+        _len = len                      # localize the global len() function
+        lock = RLock()                  # because linkedlist updates aren't threadsafe
+        root = []                       # root of the circular doubly linked list
+        root[:] = [root, root, None, None]      # initialize by pointing to self
+        nonlocal_root = [root]                  # make updateable non-locally
+        PREV, NEXT, KEY, RESULT = 0, 1, 2, 3    # names for the link fields
+
+        if maxsize == 0:
+
+            def wrapper(*args, **kwds):
+                # no caching, just do a statistics update after a successful call
+                result = user_function(*args, **kwds)
+                stats[MISSES] += 1
+                return result
+
+        elif maxsize is None:
+
+            def wrapper(*args, **kwds):
+                # simple caching without ordering or size limit
+                key = make_key(args, kwds, typed)
+                result = cache_get(key, root)   # root used here as a unique not-found sentinel
+                if result is not root:
+                    stats[HITS] += 1
+                    return result
+                result = user_function(*args, **kwds)
+                cache[key] = result
+                stats[MISSES] += 1
+                return result
+
+        else:
+
+            def wrapper(*args, **kwds):
+                # size limited caching that tracks accesses by recency
+                key = make_key(args, kwds, typed) if kwds or typed else args
+                with lock:
+                    link = cache_get(key)
+                    if link is not None:
+                        # record recent use of the key by moving it to the front of the list
+                        root, = nonlocal_root
+                        link_prev, link_next, key, result = link
+                        link_prev[NEXT] = link_next
+                        link_next[PREV] = link_prev
+                        last = root[PREV]
+                        last[NEXT] = root[PREV] = link
+                        link[PREV] = last
+                        link[NEXT] = root
+                        stats[HITS] += 1
+                        return result
+                result = user_function(*args, **kwds)
+                with lock:
+                    root, = nonlocal_root
+                    if key in cache:
+                        # getting here means that this same key was added to the
+                        # cache while the lock was released.  since the link
+                        # update is already done, we need only return the
+                        # computed result and update the count of misses.
+                        pass
+                    elif _len(cache) >= maxsize:
+                        # use the old root to store the new key and result
+                        oldroot = root
+                        oldroot[KEY] = key
+                        oldroot[RESULT] = result
+                        # empty the oldest link and make it the new root
+                        root = nonlocal_root[0] = oldroot[NEXT]
+                        oldkey = root[KEY]
+                        oldvalue = root[RESULT]
+                        root[KEY] = root[RESULT] = None
+                        # now update the cache dictionary for the new links
+                        del cache[oldkey]
+                        cache[key] = oldroot
+                    else:
+                        # put result in a new link at the front of the list
+                        last = root[PREV]
+                        link = [last, root, key, result]
+                        last[NEXT] = root[PREV] = cache[key] = link
+                    stats[MISSES] += 1
+                return result
+
+        def cache_info():
+            """Report cache statistics"""
+            with lock:
+                return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
+
+        def cache_clear():
+            """Clear the cache and cache statistics"""
+            with lock:
+                cache.clear()
+                root = nonlocal_root[0]
+                root[:] = [root, root, None, None]
+                stats[:] = [0, 0]
+
+        wrapper.__wrapped__ = user_function
+        wrapper.cache_info = cache_info
+        wrapper.cache_clear = cache_clear
+        return update_wrapper(wrapper, user_function)
+
+    return decorating_function



More information about the tor-commits mailing list