commit 73fcfaf8ccbc2c1cb714f35fcc8e2823daf39808 Author: Damian Johnson atagar@torproject.org Date: Sat Aug 4 18:52:33 2012 -0700
GETINFO caching and logging
Caching static GETINFO results, short circuiting geoip lookups when the geoip db is unavailable, and logging the call runtimes. --- stem/control.py | 139 +++++++++++++++++++++++++++++++++++++++------- stem/response/getinfo.py | 18 ++++++ 2 files changed, 137 insertions(+), 20 deletions(-)
diff --git a/stem/control.py b/stem/control.py index 16bcb10..673261c 100644 --- a/stem/control.py +++ b/stem/control.py @@ -9,10 +9,13 @@ interacting at a higher level.
::
- from_port - Provides a Controller based on a port connection. - from_socket_file - Provides a Controller based on a socket file connection. - Controller - General controller class intended for direct use. + | |- from_port - Provides a Controller based on a port connection. + | +- from_socket_file - Provides a Controller based on a socket file connection. + | + |- is_caching_enabled - true if the controller has enabled caching + |- is_geoip_unavailable - true if we've discovered our geoip db to be unavailable + |- clear_cache - clears any cached results |- get_info - issues a GETINFO query for a parameter |- get_conf - gets the value of a configuration option |- get_conf_mapping - gets the values of multiple configuration options @@ -72,6 +75,24 @@ MAPPED_CONFIG_KEYS = { "hiddenserviceoptions": "HiddenServiceOptions" }
+# unchangeable GETINFO parameters +CACHEABLE_GETINFO_PARAMS = ( + 'version', + 'config-file', + 'exit-policy/default', + 'fingerprint', + 'config/names', + 'config/defaults', + 'info/names', + 'events/names', + 'features/names', + 'process/descriptor-limit', +) + +# number of sequential attempts before we decide that the Tor geoip database +# is unavailable +GEOIP_FAILURE_THRESHOLD = 5 + # TODO: The Thread's isAlive() method and theading's currentThread() was # changed to the more conventional is_alive() and current_thread() in python # 2.6 and above. We should use that when dropping python 2.5 compatability. @@ -454,6 +475,19 @@ class Controller(BaseController): from_port = staticmethod(from_port) from_socket_file = staticmethod(from_socket_file)
+ def __init__(self, control_socket, enable_caching = True): + super(Controller, self).__init__(control_socket) + + self._is_caching_enabled = enable_caching + self._request_cache = {} + + # number of sequental 'GETINFO ip-to-country/*' lookups that have failed + self._geoip_failure_count = 0 + + def connect(self): + super(Controller, self).connect() + self.clear_cache() + def close(self): # making a best-effort attempt to quit before detaching the socket if self.is_alive(): @@ -462,7 +496,38 @@ class Controller(BaseController):
super(Controller, self).close()
- def get_info(self, param, default = UNDEFINED): + def is_caching_enabled(self): + """ + True if caching has been enabled, False otherwise. + + :returns: bool to indicate if caching is enabled + """ + + return self._is_caching_enabled + + def is_geoip_unavailable(self): + """ + Provides True if we've concluded hat our geoip database is unavailable, + False otherwise. This is determined by having our 'GETINFO ip-to-country/*' + lookups fail so this will default to False if we aren't making those + queries. + + Geoip failures will be untracked if caching is disabled. + + :returns: bool to indicate if we've concluded our geoip database to be unavailable or not + """ + + return self._geoip_failure_count >= GEOIP_FAILURE_THRESHOLD + + def clear_cache(self): + """ + Drops any cached results. + """ + + self._request_cache = {} + self._geoip_failure_count = 0 + + def get_info(self, params, default = UNDEFINED): """ Queries the control socket for the given GETINFO option. If provided a default then that's returned if the GETINFO option is undefined or the @@ -484,35 +549,69 @@ class Controller(BaseController): :class:`stem.socket.InvalidArguments` if the 'param' requested was invalid """
- # TODO: add caching? - # TODO: special geoip handling? - # TODO: add logging, including call runtime + start_time = time.time() + reply = {}
- if isinstance(param, str): + if isinstance(params, str): is_multiple = False - param = [param] + params = set([params]) else: + if not params: return {} is_multiple = True + params = set(params) + + # check for cached results + for param in list(params): + cache_key = "getinfo.%s" % param.lower() + + if cache_key in self._request_cache: + reply[param] = self._request_cache[cache_key] + params.remove(param) + elif param.startswith('ip-to-country/') and self.is_geoip_unavailable(): + # the geoip database aleady looks to be unavailable - abort the request + raise stem.socket.ProtocolError("Tor geoip database is unavailable") + + # if everything was cached then short circuit making the query + if not params: + log.debug("GETINFO %s (cache fetch)" % " ".join(reply.keys())) + + if is_multiple: return reply + else: return reply.values()[0]
try: - response = self.msg("GETINFO %s" % " ".join(param)) + response = self.msg("GETINFO %s" % " ".join(params)) stem.response.convert("GETINFO", response) + response.assert_matches(params) + reply.update(response.entries)
- # error if we got back different parameters than we requested - requested_params = set(param) - reply_params = set(response.entries.keys()) + if self.is_caching_enabled(): + for key, value in response.entries.items(): + key = key.lower() # make case insensitive + + if key in CACHEABLE_GETINFO_PARAMS: + self._request_cache["getinfo.%s" % key] = value + elif key.startswith('ip-to-country/'): + # both cacheable and means that we should reset the geoip failure count + self._request_cache["getinfo.%s" % key] = value + self._geoip_failure_count = -1
- if requested_params != reply_params: - requested_label = ", ".join(requested_params) - reply_label = ", ".join(reply_params) - - raise stem.socket.ProtocolError("GETINFO reply doesn't match the parameters that we requested. Queried '%s' but got '%s'." % (requested_label, reply_label)) + log.debug("GETINFO %s (runtime: %0.4f)" % (" ".join(params), time.time() - start_time))
if is_multiple: - return response.entries + return reply else: - return response.entries[param[0]] + return reply.values()[0] except stem.socket.ControllerError, exc: + # bump geoip failure count if... + # * we're caching results + # * this was soley a geoip lookup + # * we've never had a successful geoip lookup (faiure count isn't -1) + + is_geoip_request = len(params) == 1 and list(params)[0].startswith('ip-to-country/') + + if is_geoip_request and self.is_caching_enabled() and self._geoip_failure_count != -1: + self._geoip_failure_count += 1 + if default == UNDEFINED: raise exc else: return default
diff --git a/stem/response/getinfo.py b/stem/response/getinfo.py index a2cce57..24aeb84 100644 --- a/stem/response/getinfo.py +++ b/stem/response/getinfo.py @@ -52,4 +52,22 @@ class GetInfoResponse(stem.response.ControlMessage): value = value[1:]
self.entries[key] = value + + def assert_matches(self, params): + """ + Checks if we match a given set of parameters, and raise a ProtocolError if not. + + :param set params: parameters to assert that we contain + + :raises: + * :class:`stem.socket.ProtocolError` if parameters don't match this response + """ + + reply_params = set(self.entries.keys()) + + if params != reply_params: + requested_label = ", ".join(params) + reply_label = ", ".join(reply_params) + + raise stem.socket.ProtocolError("GETINFO reply doesn't match the parameters that we requested. Queried '%s' but got '%s'." % (requested_label, reply_label))