[tor-commits] [stem/master] GETINFO caching and logging

atagar at torproject.org atagar at torproject.org
Sun Aug 5 03:02:57 UTC 2012


commit 73fcfaf8ccbc2c1cb714f35fcc8e2823daf39808
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Aug 4 18:52:33 2012 -0700

    GETINFO caching and logging
    
    Caching static GETINFO results, short circuiting geoip lookups when the geoip
    db is unavailable, and logging the call runtimes.
---
 stem/control.py          |  139 +++++++++++++++++++++++++++++++++++++++-------
 stem/response/getinfo.py |   18 ++++++
 2 files changed, 137 insertions(+), 20 deletions(-)

diff --git a/stem/control.py b/stem/control.py
index 16bcb10..673261c 100644
--- a/stem/control.py
+++ b/stem/control.py
@@ -9,10 +9,13 @@ interacting at a higher level.
 
 ::
 
-  from_port - Provides a Controller based on a port connection.
-  from_socket_file - Provides a Controller based on a socket file connection.
-  
   Controller - General controller class intended for direct use.
+    | |- from_port - Provides a Controller based on a port connection.
+    | +- from_socket_file - Provides a Controller based on a socket file connection.
+    |
+    |- is_caching_enabled - true if the controller has enabled caching
+    |- is_geoip_unavailable - true if we've discovered our geoip db to be unavailable
+    |- clear_cache - clears any cached results
     |- get_info - issues a GETINFO query for a parameter
     |- get_conf - gets the value of a configuration option
     |- get_conf_mapping - gets the values of multiple configuration options
@@ -72,6 +75,24 @@ MAPPED_CONFIG_KEYS = {
   "hiddenserviceoptions": "HiddenServiceOptions"
 }
 
+# unchangeable GETINFO parameters
+CACHEABLE_GETINFO_PARAMS = (
+  'version',
+  'config-file',
+  'exit-policy/default',
+  'fingerprint',
+  'config/names',
+  'config/defaults',
+  'info/names',
+  'events/names',
+  'features/names',
+  'process/descriptor-limit',
+)
+
+# number of sequential attempts before we decide that the Tor geoip database
+# is unavailable
+GEOIP_FAILURE_THRESHOLD = 5
+
 # TODO: The Thread's isAlive() method and theading's currentThread() was
 # changed to the more conventional is_alive() and current_thread() in python
 # 2.6 and above. We should use that when dropping python 2.5 compatability.
@@ -454,6 +475,19 @@ class Controller(BaseController):
   from_port = staticmethod(from_port)
   from_socket_file = staticmethod(from_socket_file)
   
+  def __init__(self, control_socket, enable_caching = True):
+    super(Controller, self).__init__(control_socket)
+    
+    self._is_caching_enabled = enable_caching
+    self._request_cache = {}
+    
+    # number of sequental 'GETINFO ip-to-country/*' lookups that have failed
+    self._geoip_failure_count = 0
+  
+  def connect(self):
+    super(Controller, self).connect()
+    self.clear_cache()
+  
   def close(self):
     # making a best-effort attempt to quit before detaching the socket
     if self.is_alive():
@@ -462,7 +496,38 @@ class Controller(BaseController):
     
     super(Controller, self).close()
   
-  def get_info(self, param, default = UNDEFINED):
+  def is_caching_enabled(self):
+    """
+    True if caching has been enabled, False otherwise.
+    
+    :returns: bool to indicate if caching is enabled
+    """
+    
+    return self._is_caching_enabled
+  
+  def is_geoip_unavailable(self):
+    """
+    Provides True if we've concluded hat our geoip database is unavailable,
+    False otherwise. This is determined by having our 'GETINFO ip-to-country/*'
+    lookups fail so this will default to False if we aren't making those
+    queries.
+    
+    Geoip failures will be untracked if caching is disabled.
+    
+    :returns: bool to indicate if we've concluded our geoip database to be unavailable or not
+    """
+    
+    return self._geoip_failure_count >= GEOIP_FAILURE_THRESHOLD
+  
+  def clear_cache(self):
+    """
+    Drops any cached results.
+    """
+    
+    self._request_cache = {}
+    self._geoip_failure_count = 0
+  
+  def get_info(self, params, default = UNDEFINED):
     """
     Queries the control socket for the given GETINFO option. If provided a
     default then that's returned if the GETINFO option is undefined or the
@@ -484,35 +549,69 @@ class Controller(BaseController):
       :class:`stem.socket.InvalidArguments` if the 'param' requested was invalid
     """
     
-    # TODO: add caching?
-    # TODO: special geoip handling?
-    # TODO: add logging, including call runtime
+    start_time = time.time()
+    reply = {}
     
-    if isinstance(param, str):
+    if isinstance(params, str):
       is_multiple = False
-      param = [param]
+      params = set([params])
     else:
+      if not params: return {}
       is_multiple = True
+      params = set(params)
+    
+    # check for cached results
+    for param in list(params):
+      cache_key = "getinfo.%s" % param.lower()
+      
+      if cache_key in self._request_cache:
+        reply[param] = self._request_cache[cache_key]
+        params.remove(param)
+      elif param.startswith('ip-to-country/') and self.is_geoip_unavailable():
+        # the geoip database aleady looks to be unavailable - abort the request
+        raise stem.socket.ProtocolError("Tor geoip database is unavailable")
+    
+    # if everything was cached then short circuit making the query
+    if not params:
+      log.debug("GETINFO %s (cache fetch)" % " ".join(reply.keys()))
+      
+      if is_multiple: return reply
+      else: return reply.values()[0]
     
     try:
-      response = self.msg("GETINFO %s" % " ".join(param))
+      response = self.msg("GETINFO %s" % " ".join(params))
       stem.response.convert("GETINFO", response)
+      response.assert_matches(params)
+      reply.update(response.entries)
       
-      # error if we got back different parameters than we requested
-      requested_params = set(param)
-      reply_params = set(response.entries.keys())
+      if self.is_caching_enabled():
+        for key, value in response.entries.items():
+          key = key.lower() # make case insensitive
+          
+          if key in CACHEABLE_GETINFO_PARAMS:
+            self._request_cache["getinfo.%s" % key] = value
+          elif key.startswith('ip-to-country/'):
+            # both cacheable and means that we should reset the geoip failure count
+            self._request_cache["getinfo.%s" % key] = value
+            self._geoip_failure_count = -1
       
-      if requested_params != reply_params:
-        requested_label = ", ".join(requested_params)
-        reply_label = ", ".join(reply_params)
-        
-        raise stem.socket.ProtocolError("GETINFO reply doesn't match the parameters that we requested. Queried '%s' but got '%s'." % (requested_label, reply_label))
+      log.debug("GETINFO %s (runtime: %0.4f)" % (" ".join(params), time.time() - start_time))
       
       if is_multiple:
-        return response.entries
+        return reply
       else:
-        return response.entries[param[0]]
+        return reply.values()[0]
     except stem.socket.ControllerError, exc:
+      # bump geoip failure count if...
+      # * we're caching results
+      # * this was soley a geoip lookup
+      # * we've never had a successful geoip lookup (faiure count isn't -1)
+      
+      is_geoip_request = len(params) == 1 and list(params)[0].startswith('ip-to-country/')
+      
+      if is_geoip_request and self.is_caching_enabled() and self._geoip_failure_count != -1:
+        self._geoip_failure_count += 1
+      
       if default == UNDEFINED: raise exc
       else: return default
   
diff --git a/stem/response/getinfo.py b/stem/response/getinfo.py
index a2cce57..24aeb84 100644
--- a/stem/response/getinfo.py
+++ b/stem/response/getinfo.py
@@ -52,4 +52,22 @@ class GetInfoResponse(stem.response.ControlMessage):
         value = value[1:]
       
       self.entries[key] = value
+  
+  def assert_matches(self, params):
+    """
+    Checks if we match a given set of parameters, and raise a ProtocolError if not.
+    
+    :param set params: parameters to assert that we contain
+    
+    :raises:
+      * :class:`stem.socket.ProtocolError` if parameters don't match this response
+    """
+    
+    reply_params = set(self.entries.keys())
+    
+    if params != reply_params:
+      requested_label = ", ".join(params)
+      reply_label = ", ".join(reply_params)
+      
+      raise stem.socket.ProtocolError("GETINFO reply doesn't match the parameters that we requested. Queried '%s' but got '%s'." % (requested_label, reply_label))
 





More information about the tor-commits mailing list