commit f993fccf0e1ad319920a3ae446d26f3ae8b5de43 Author: Damian Johnson atagar@torproject.org Date: Sat Oct 10 16:23:54 2015 -0700
Notification for especially high latency
We've had ongoing latency issues with Faravahar for a while now. Adding a notice level check to better surface this...
NOTICE: Downloading the consensus from Faravahar took 74.5s. Median download time is 3.0s: maatuska => 9.6s, tor26 => 2.6s, longclaw => 2.8s, dizum => 2.7s, gabelmoo => 2.9s, moria1 => 3.0s, dannenberg => 3.3s, Faravahar => 74.5s --- consensus_health_checker.py | 17 +++++++++++++++++ data/consensus_health.cfg | 1 + 2 files changed, 18 insertions(+)
diff --git a/consensus_health_checker.py b/consensus_health_checker.py index d762f74..d34db57 100755 --- a/consensus_health_checker.py +++ b/consensus_health_checker.py @@ -140,6 +140,11 @@ class Issue(object): attr.update({'authorities': ''})
return CONFIG['msg'][self._template].format(**attr).replace(' ', '_') + elif self._template == 'LATENCY': + attr = dict(self._attr) + attr.update({'authority': '', 'time_taken': '', 'median_time': '', 'authority_times': ''}) + + return CONFIG['msg'][self._template].format(**attr).replace(' ', '_') else: return self.get_message().replace(' ', '_')
@@ -775,9 +780,13 @@ def _get_documents(label, resource): validate = True, )
+ times_taken = {} + for authority, query in queries.items(): try: + start_time = time.time() documents[authority] = query.run()[0] + times_taken[authority] = time.time() - start_time except Exception as exc: if label == 'vote': # try to download the vote via the other authorities @@ -798,6 +807,14 @@ def _get_documents(label, resource):
issues.append(Issue(Runlevel.ERROR, 'AUTHORITY_UNAVAILABLE', fetch_type = label, authority = authority, url = query.download_url, error = exc, to = [authority]))
+ if label == 'consensus': + median_time = sorted(times_taken.values())[len(times_taken) / 2] + authority_times = ', '.join(['%s => %0.1fs' % (authority, time_taken) for authority, time_taken in times_taken.items()]) + + for authority, time_taken in times_taken.items(): + if time_taken > median_time * 5: + issues.append(Issue(Runlevel.NOTICE, 'LATENCY', authority = authority, time_taken = '%0.1fs' % time_taken, median_time = '%0.1fs' % median_time, authority_times = authority_times, to = [authority])) + return documents, issues
diff --git a/data/consensus_health.cfg b/data/consensus_health.cfg index 7e30fd0..2c5652a 100644 --- a/data/consensus_health.cfg +++ b/data/consensus_health.cfg @@ -1,5 +1,6 @@ # message templates for notifications we send
+msg LATENCY => Downloading the consensus from {authority} took {time_taken}. Median download time is {median_time}: {authority_times} msg MISSING_LATEST_CONSENSUS => The consensuses published by the following directory authorities are more than one hour old and therefore not fresh anymore: {authorities} msg CONSENSUS_METHOD_UNSUPPORTED => The following directory authorities do not support the consensus method that the consensus uses: {authorities} msg DIFFERENT_RECOMMENDED_VERSION => The following directory authorities recommend other {type} versions than the consensus: {differences}
tor-commits@lists.torproject.org