commit ea37bc9f74dfa238dc6e94f2a98a762a7d27f93a Author: juga0 juga@riseup.net Date: Mon Feb 18 14:15:29 2019 +0000
scanner: Warn when there is no progress
measuring unique relays. Create new module heartbeat.
Closes: #28652 --- sbws/core/scanner.py | 15 ++++++++++++- sbws/lib/heartbeat.py | 46 ++++++++++++++++++++++++++++++++++++++++ sbws/lib/relaylist.py | 7 ++++++ tests/unit/lib/test_heartbeat.py | 21 ++++++++++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/sbws/core/scanner.py b/sbws/core/scanner.py index 24a975b..f6443e5 100644 --- a/sbws/core/scanner.py +++ b/sbws/core/scanner.py @@ -33,6 +33,7 @@ import requests import random
from .. import settings +from ..lib import heartbeat
rng = random.SystemRandom() log = logging.getLogger(__name__) @@ -478,6 +479,13 @@ def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump, measured.
""" + # Variable to count total progress in the last days: + # In case it is needed to see which relays are not being measured, + # store their fingerprint, not only their number. + measured_fp_set = set() + measured_percent = 0 + main_loop_tstart = time.monotonic() + # Set the time to wait for a thread to finish as the half of an HTTP # request timeout. # Do not start a new loop if sbws is stopping. @@ -503,7 +511,7 @@ def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump, [args, conf, destinations, circuit_builder, relay_list, target], {}, callback, callback_err) pending_results.append(async_result) - + measured_fp_set.add(async_result) # After the for has finished, the pool has queued all the relays # and pending_results has the list of all the AsyncResults. # It could also be obtained with pool._cache, which contains @@ -511,6 +519,11 @@ def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump, num_relays_to_measure = len(pending_results) wait_for_results(num_relays_to_measure, pending_results)
+ measured_percent = heartbeat.total_measured_percent( + measured_percent, relay_list.relays_fingerprints, measured_fp_set, + main_loop_tstart, conf.getpath('paths', 'state_fname') + ) + loop_tstop = time.time() loop_tdelta = (loop_tstop - loop_tstart) / 60 log.debug("Measured %s relays in %s minutes", num_relays, loop_tdelta) diff --git a/sbws/lib/heartbeat.py b/sbws/lib/heartbeat.py new file mode 100644 index 0000000..7dfa716 --- /dev/null +++ b/sbws/lib/heartbeat.py @@ -0,0 +1,46 @@ +""" +Classes and functions to implement a heartbeat system to monitor the progress. +""" +import logging +import time + +from ..util.state import State + + +log = logging.getLogger(__name__) + +# NOTE tech-debt: this could go be tracked globally as a singleton +consensus_fp_set = set() + + +def total_measured_percent(measured_percent, relays_fingerprints, + measured_fp_set, main_loop_tstart, state_path): + """Returns the new percentage of the different relays that were measured. + + This way it can be known whether the scanner is making progress measuring + all the Network. + + Log the percentage, the number of relays measured and not measured, + the number of loops and the time elapsed since it started measuring. + """ + global consensus_fp_set + # NOTE: in a future refactor make State a singleton in __init__.py + state_dict = State(state_path) + loops_count = state_dict.get('recent_priority_list_count', 0) + + # Store all the relays seen in all the consensuses. + [consensus_fp_set.add(r) for r in relays_fingerprints] + + not_measured_fp_set = consensus_fp_set.difference(measured_fp_set) + main_loop_tdelta = (time.monotonic() - main_loop_tstart) / 60 + new_measured_percent = round( + len(measured_fp_set) / len(consensus_fp_set) * 100) + log.info("Run %s main loops.", loops_count) + log.info("Measured in total %s (%s%%) unique relays in %s minutes", + len(measured_fp_set), new_measured_percent, main_loop_tdelta) + log.info("%s relays still not measured.", len(not_measured_fp_set)) + # The case when it is equal will only happen when all the relays have been + # measured. + if (new_measured_percent <= measured_percent): + log.warning("There is no progress measuring relays!.") + return new_measured_percent diff --git a/sbws/lib/relaylist.py b/sbws/lib/relaylist.py index 6a66069..fcfbdea 100644 --- a/sbws/lib/relaylist.py +++ b/sbws/lib/relaylist.py @@ -355,6 +355,13 @@ class RelayList: def authorities(self): return self._relays_with_flag(Flag.AUTHORITY)
+ @property + def relays_fingerprints(self): + # Using relays instead of _relays, so that the list get updated if + # needed, since this method is used to know which fingerprints are in + # the consensus. + return [r.fingerprint for r in self.relays] + def random_relay(self): return self.rng.choice(self.relays)
diff --git a/tests/unit/lib/test_heartbeat.py b/tests/unit/lib/test_heartbeat.py new file mode 100644 index 0000000..55573a8 --- /dev/null +++ b/tests/unit/lib/test_heartbeat.py @@ -0,0 +1,21 @@ +"""Unit tests for heartbeat""" +import logging +import time + +from sbws.lib import heartbeat + + +def test_total_measured_percent(conf, caplog): + measured_percent = 0 + measured_fp_set = set(['A', 'B']) + main_loop_tstart = time.monotonic() + relays_fingerprints = set(['A', 'B', 'C']) + + caplog.set_level(logging.INFO) + new_measured_percent = heartbeat.total_measured_percent( + measured_percent, relays_fingerprints, measured_fp_set, + main_loop_tstart, conf.getpath('paths', 'state_fname') + ) + assert new_measured_percent == 67 + caplog.records[1].getMessage().find("Measured in total 2 (67%)") + caplog.records[2].getMessage().find("1 relays still not measured")