commit a550d73178d6fc8158bb5ae518e719c0f81b8ae7 Author: juga0 juga@riseup.net Date: Fri Dec 21 05:19:54 2018 +0000
scanner: log backtrace when not progressing
When sbws stalls after the prioritization loop waiting for thread results, give it some minutes and otherwise log the backtrace. This way if there's any bug or deadlock, operators can help to debug it. --- sbws/core/scanner.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-)
diff --git a/sbws/core/scanner.py b/sbws/core/scanner.py index 80251ec..9c7dff5 100644 --- a/sbws/core/scanner.py +++ b/sbws/core/scanner.py @@ -1,5 +1,8 @@ ''' Measure the relays. '''
+import sys +import threading + from ..lib.circuitbuilder import GapsCircuitBuilder as CB from ..lib.resultdump import ResultDump from ..lib.resultdump import ResultSuccess, ResultErrorCircuit @@ -27,6 +30,20 @@ end_event = Event() log = logging.getLogger(__name__)
+def dumpstacks(): + import pdb + import traceback + log.warning("sbws stop measuring relays, probably because of a bug." + "Please, open a ticket in trac.torproject.org with this" + "backtrace.") + thread_id2name = dict([(t.ident, t.name) for t in threading.enumerate()]) + for thread_id, stack in sys._current_frames().items(): + log.critical("Thread: %s(%d)", + thread_id2name.get(thread_id, ""), thread_id) + log.critical(traceback.print_stack(stack)) + pdb.set_trace() + + def timed_recv_from_server(session, dest, byte_range): ''' Request the **byte_range** from the URL at **dest**. If successful, return True and the time it took to download. Otherwise return False and an @@ -359,9 +376,16 @@ def run_speedtest(args, conf): while len(pending_results) >= max_pending_results: time.sleep(5) pending_results = [r for r in pending_results if not r.ready()] - while len(pending_results) > 0: + counter = 0 + # give it 3min, otherwise there's a bug or deadlock + while len(pending_results) > 0 and counter <= 36: + counter += 1 + log.debug("Number of pending measurement threads %s after " + "a prioritization loop.", len(pending_results)) time.sleep(5) pending_results = [r for r in pending_results if not r.ready()] + if counter > 36: + dumpstacks() loop_tstop = time.time() loop_tdelta = (loop_tstop - loop_tstart) / 60 log.debug("Measured %s relays in %s minutes", num_relays, loop_tdelta)
tor-commits@lists.torproject.org