[tor-commits] [sbws/master] scanner: log backtrace when not progressing

juga at torproject.org juga at torproject.org
Wed Jan 23 15:17:22 UTC 2019


commit a550d73178d6fc8158bb5ae518e719c0f81b8ae7
Author: juga0 <juga at riseup.net>
Date:   Fri Dec 21 05:19:54 2018 +0000

    scanner: log backtrace when not progressing
    
    When sbws stalls after the prioritization loop waiting for thread
    results, give it some minutes and otherwise log the backtrace.
    This way if there's any bug or deadlock, operators can help to
    debug it.
---
 sbws/core/scanner.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/sbws/core/scanner.py b/sbws/core/scanner.py
index 80251ec..9c7dff5 100644
--- a/sbws/core/scanner.py
+++ b/sbws/core/scanner.py
@@ -1,5 +1,8 @@
 ''' Measure the relays. '''
 
+import sys
+import threading
+
 from ..lib.circuitbuilder import GapsCircuitBuilder as CB
 from ..lib.resultdump import ResultDump
 from ..lib.resultdump import ResultSuccess, ResultErrorCircuit
@@ -27,6 +30,20 @@ end_event = Event()
 log = logging.getLogger(__name__)
 
 
+def dumpstacks():
+    import pdb
+    import traceback
+    log.warning("sbws stop measuring relays, probably because of a bug."
+                "Please, open a ticket in trac.torproject.org with this"
+                "backtrace.")
+    thread_id2name = dict([(t.ident, t.name) for t in threading.enumerate()])
+    for thread_id, stack in sys._current_frames().items():
+        log.critical("Thread: %s(%d)",
+                     thread_id2name.get(thread_id, ""), thread_id)
+        log.critical(traceback.print_stack(stack))
+    pdb.set_trace()
+
+
 def timed_recv_from_server(session, dest, byte_range):
     ''' Request the **byte_range** from the URL at **dest**. If successful,
     return True and the time it took to download. Otherwise return False and an
@@ -359,9 +376,16 @@ def run_speedtest(args, conf):
             while len(pending_results) >= max_pending_results:
                 time.sleep(5)
                 pending_results = [r for r in pending_results if not r.ready()]
-        while len(pending_results) > 0:
+        counter = 0
+        # give it 3min, otherwise there's a bug or deadlock
+        while len(pending_results) > 0 and counter <= 36:
+            counter += 1
+            log.debug("Number of pending measurement threads %s after "
+                      "a prioritization loop.", len(pending_results))
             time.sleep(5)
             pending_results = [r for r in pending_results if not r.ready()]
+        if counter > 36:
+            dumpstacks()
         loop_tstop = time.time()
         loop_tdelta = (loop_tstop - loop_tstart) / 60
         log.debug("Measured %s relays in %s minutes", num_relays, loop_tdelta)





More information about the tor-commits mailing list