[tor-commits] [doctor/master] Notification for especially high latency

atagar at torproject.org atagar at torproject.org
Sun Oct 11 00:23:44 UTC 2015


commit f993fccf0e1ad319920a3ae446d26f3ae8b5de43
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Oct 10 16:23:54 2015 -0700

    Notification for especially high latency
    
    We've had ongoing latency issues with Faravahar for a while now. Adding a
    notice level check to better surface this...
    
      NOTICE: Downloading the consensus from Faravahar took 74.5s. Median download
      time is 3.0s: maatuska => 9.6s, tor26 => 2.6s, longclaw => 2.8s, dizum =>
      2.7s, gabelmoo => 2.9s, moria1 => 3.0s, dannenberg => 3.3s, Faravahar =>
      74.5s
---
 consensus_health_checker.py |   17 +++++++++++++++++
 data/consensus_health.cfg   |    1 +
 2 files changed, 18 insertions(+)

diff --git a/consensus_health_checker.py b/consensus_health_checker.py
index d762f74..d34db57 100755
--- a/consensus_health_checker.py
+++ b/consensus_health_checker.py
@@ -140,6 +140,11 @@ class Issue(object):
       attr.update({'authorities': ''})
 
       return CONFIG['msg'][self._template].format(**attr).replace(' ', '_')
+    elif self._template == 'LATENCY':
+      attr = dict(self._attr)
+      attr.update({'authority': '', 'time_taken': '', 'median_time': '', 'authority_times': ''})
+
+      return CONFIG['msg'][self._template].format(**attr).replace(' ', '_')
     else:
       return self.get_message().replace(' ', '_')
 
@@ -775,9 +780,13 @@ def _get_documents(label, resource):
       validate = True,
     )
 
+  times_taken = {}
+
   for authority, query in queries.items():
     try:
+      start_time = time.time()
       documents[authority] = query.run()[0]
+      times_taken[authority] = time.time() - start_time
     except Exception as exc:
       if label == 'vote':
         # try to download the vote via the other authorities
@@ -798,6 +807,14 @@ def _get_documents(label, resource):
 
       issues.append(Issue(Runlevel.ERROR, 'AUTHORITY_UNAVAILABLE', fetch_type = label, authority = authority, url = query.download_url, error = exc, to = [authority]))
 
+  if label == 'consensus':
+    median_time = sorted(times_taken.values())[len(times_taken) / 2]
+    authority_times = ', '.join(['%s => %0.1fs' % (authority, time_taken) for authority, time_taken in times_taken.items()])
+
+    for authority, time_taken in times_taken.items():
+      if time_taken > median_time * 5:
+        issues.append(Issue(Runlevel.NOTICE, 'LATENCY', authority = authority, time_taken = '%0.1fs' % time_taken, median_time = '%0.1fs' % median_time, authority_times = authority_times, to = [authority]))
+
   return documents, issues
 
 
diff --git a/data/consensus_health.cfg b/data/consensus_health.cfg
index 7e30fd0..2c5652a 100644
--- a/data/consensus_health.cfg
+++ b/data/consensus_health.cfg
@@ -1,5 +1,6 @@
 # message templates for notifications we send
 
+msg LATENCY => Downloading the consensus from {authority} took {time_taken}. Median download time is {median_time}: {authority_times}
 msg MISSING_LATEST_CONSENSUS => The consensuses published by the following directory authorities are more than one hour old and therefore not fresh anymore: {authorities}
 msg CONSENSUS_METHOD_UNSUPPORTED => The following directory authorities do not support the consensus method that the consensus uses: {authorities}
 msg DIFFERENT_RECOMMENDED_VERSION => The following directory authorities recommend other {type} versions than the consensus: {differences}



More information about the tor-commits mailing list