[tor-commits] [sbws/master] destination: record consecutive failures

juga at torproject.org juga at torproject.org
Wed Feb 27 09:11:30 UTC 2019


commit 84e5d82cb99f06c37eb26039e59aaba04a6041d0
Author: juga0 <juga at riseup.net>
Date:   Thu Dec 20 15:26:33 2018 +0000

    destination: record consecutive failures
    
    Add methods to store consecutive destination failures and retrieve
    the destinations that are still functional.
    Since destinations can fail because of Tor circuits, it's not count
    individual failures but consecutives one.
    Also exit with error if there are no functional destinations left.
    The maximum number of consecuitve failures is set to 10, but it
    may need to be changed depending on the percentage of circuits and
    requests that fail.
---
 DEPLOY.rst                                     |  2 +
 docs/source/examples/sbws.example.ini          |  7 +++-
 docs/source/man_sbws.ini.rst                   |  5 +++
 sbws/core/scanner.py                           |  9 +++--
 sbws/globals.py                                |  5 +++
 sbws/lib/destination.py                        | 53 +++++++++++++++++++++++++-
 tests/integration/lib/test_destination.py      | 38 ++++++++++++++++++
 tests/integration/lib/test_relayprioritizer.py |  1 +
 8 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/DEPLOY.rst b/DEPLOY.rst
index 1a1ea76..a1ab4dd 100644
--- a/DEPLOY.rst
+++ b/DEPLOY.rst
@@ -38,6 +38,8 @@ or `<INSTALL.html>`_  (local build or Read the Docs).
 
 To run the ``scanner`` it is mandatory to create a configuration file with at
 least one ``destination``.
+It is recommended to set several ``destination``s so that the ``scanner`` can
+continue if one fails.
 
 If ``sbws`` is installed from the Debian package, then create a file in
 ``/etc/sbws/sbws.ini`` like in the following example:
diff --git a/docs/source/examples/sbws.example.ini b/docs/source/examples/sbws.example.ini
index 1325bc8..2f0a6bd 100644
--- a/docs/source/examples/sbws.example.ini
+++ b/docs/source/examples/sbws.example.ini
@@ -4,7 +4,12 @@
 nickname = sbws_default
 
 [destinations]
-# a destination can be disabled changing `on` by `off`
+# With several destinations, the scanner can continue even if some of them
+# fail, which can be caused by a network problem on their side.
+# If all of them fail, the scanner will stop, which
+# will happen if there is network problem on the scanner side.
+
+# A destination can be disabled changing `on` by `off`
 foo = on
 
 [destinations.foo]
diff --git a/docs/source/man_sbws.ini.rst b/docs/source/man_sbws.ini.rst
index 8064e72..8886d62 100644
--- a/docs/source/man_sbws.ini.rst
+++ b/docs/source/man_sbws.ini.rst
@@ -58,6 +58,11 @@ paths
     (Default ~/.sbws/log)
 
 destinations
+
+  It is required to set at least one destination for the scanner to run.
+  It is recommended to set several destinations so that the scanner can
+  continue if one fails.
+
   STR = {on, off}
     Name of destination. It is a name for the Web server from where to
     download files in order to measure bandwidths.
diff --git a/sbws/core/scanner.py b/sbws/core/scanner.py
index dd4198c..4e981fd 100644
--- a/sbws/core/scanner.py
+++ b/sbws/core/scanner.py
@@ -244,11 +244,14 @@ def measure_relay(args, conf, destinations, cb, rl, relay):
         cb.controller, conf.getfloat('general', 'http_timeout'))
     # Pick a destionation
     dest = destinations.next()
+    # If there is no any destination at this point, it can not continue.
     if not dest:
         # XXX: this should return a ResultError
-        log.debug('Unable to get destination to measure %s %s',
-                  relay.nickname, relay.fingerprint)
-        return None
+        log.critical("There are not any functional destinations.\n"
+                     "It is recommended to set several destinations so that "
+                     "the scanner can continue if one fails.")
+        # This should raise an error so that the caller can close the pool.
+        exit(1)
     # Pick a relay to help us measure the given relay. If the given relay is an
     # exit, then pick a non-exit. Otherwise pick an exit.
     helper = None
diff --git a/sbws/globals.py b/sbws/globals.py
index a40322c..3f128fb 100644
--- a/sbws/globals.py
+++ b/sbws/globals.py
@@ -108,6 +108,11 @@ HTTP_GET_HEADERS = {
     'Accept-Encoding': 'identity',
 }
 DESTINATION_VERIFY_CERTIFICATE = True
+# This number might need adjusted depending on the percentage of circuits and
+# HTTP requests failures.
+# While the scanner can not recover from some/all failing destionations,
+# set a big number so that it continues trying.
+MAXIMUM_NUMBER_DESTINATION_FAILURES = 100
 
 
 def fail_hard(*a, **kw):
diff --git a/sbws/lib/destination.py b/sbws/lib/destination.py
index 468b3f6..302ac3f 100644
--- a/sbws/lib/destination.py
+++ b/sbws/lib/destination.py
@@ -11,6 +11,8 @@ from sbws.globals import DESTINATION_VERIFY_CERTIFICATE
 import sbws.util.stem as stem_utils
 import sbws.util.requests as requests_utils
 
+from ..globals import MAXIMUM_NUMBER_DESTINATION_FAILURES
+
 log = logging.getLogger(__name__)
 
 
@@ -83,21 +85,32 @@ def connect_to_destination_over_circuit(dest, circ_id, session, cont, max_dl):
             head = session.head(dest.url, verify=dest.verify)
         except (requests.exceptions.ConnectionError,
                 requests.exceptions.ReadTimeout) as e:
+            dest.set_failure()
             return False, 'Could not connect to {} over circ {} {}: {}'.format(
                 dest.url, circ_id, stem_utils.circuit_str(cont, circ_id), e)
         finally:
             stem_utils.remove_event_listener(cont, listener)
     if head.status_code != requests.codes.ok:
+        dest.set_failure()
         return False, error_prefix + 'we expected HTTP code '\
             '{} not {}'.format(requests.codes.ok, head.status_code)
     if 'content-length' not in head.headers:
+        dest.set_failure()
         return False, error_prefix + 'we except the header Content-Length '\
-                'to exist in the response'
+            'to exist in the response'
     content_length = int(head.headers['content-length'])
     if max_dl > content_length:
+        dest.set_failure()
         return False, error_prefix + 'our maximum configured download size '\
             'is {} but the content is only {}'.format(max_dl, content_length)
     log.debug('Connected to %s over circuit %s', dest.url, circ_id)
+    # Any failure connecting to the destination will call set_failure,
+    # which will set `failed` to True and count consecutives failures.
+    # It can not be set at the start, to be able to know if it failed a
+    # a previous time, which is checked by set_failure.
+    # Future improvement: use a list to count consecutive failures
+    # or calculate it from the results.
+    dest.failed = False
     return True, {'content_length': content_length}
 
 
@@ -107,6 +120,38 @@ class Destination:
         u = urlparse(url)
         self._url = u
         self._verify = verify
+        # Flag to record whether this destination failed in the last
+        # measurement.
+        # Failures can happen if:
+        # - an HTTPS request can not be made over Tor
+        # (which might be the relays fault, not the destination being
+        # unreachable)
+        # - the destination does not support HTTP Range requests.
+        self.failed = False
+        self.consecutive_failures = 0
+
+    @property
+    def is_functional(self):
+        """
+        Returns True if there has not been a number consecutive measurements.
+        Otherwise warn about it and return False.
+
+        """
+        if self.consecutive_failures > MAXIMUM_NUMBER_DESTINATION_FAILURES:
+            log.warning("Destination %s is not functional. Please check that "
+                        "it is correct.", self._url)
+            return False
+        return True
+
+    def set_failure(self):
+        """Set failed to True and increase the number of consecutive failures.
+        Only if it also failed in the previous measuremnt.
+
+        """
+        # if it failed in the last measurement
+        if self.failed:
+            self.consecutive_failures += 1
+        self.failed = True
 
     def is_usable(self, circ_id, session, cont):
         ''' Use **connect_to_destination_over_circuit** to determine if this
@@ -172,6 +217,10 @@ class DestinationList:
             conf.getfloat('general', 'http_timeout')
         self._usability_lock = RLock()
 
+    @property
+    def functional_destinations(self):
+        return [d for d in self._all_dests if d.is_functional]
+
     def _should_perform_usability_test(self):
         # Until bigger refactor, do not perform usability test.
         # Every time a measurement is done, it already performs what usability
@@ -259,4 +308,4 @@ class DestinationList:
         # This removes the need for an extra lock for every measurement.
         # Do not change the order of the destinations, just return a
         # destination.
-        return self._rng.choice(self._usable_dests)
+        return self._rng.choice(self.functional_destinations)
diff --git a/tests/integration/lib/test_destination.py b/tests/integration/lib/test_destination.py
index 53c0ab5..44a8631 100644
--- a/tests/integration/lib/test_destination.py
+++ b/tests/integration/lib/test_destination.py
@@ -1,4 +1,5 @@
 """Integration tests for destination.py"""
+from sbws.globals import MAXIMUM_NUMBER_DESTINATION_FAILURES
 import sbws.util.requests as requests_utils
 from sbws.lib.destination import (DestinationList, Destination,
                                   connect_to_destination_over_circuit)
@@ -39,6 +40,9 @@ def test_connect_to_destination_over_circuit_success(persistent_launch_tor,
         destination, circuit_id, session, persistent_launch_tor, 1024)
     assert is_usable is True
     assert 'content_length' in response
+    assert not destination.failed
+    assert destination.consecutive_failures == 0
+    assert destination.is_functional
 
 
 def test_connect_to_destination_over_circuit_fail(persistent_launch_tor,
@@ -59,3 +63,37 @@ def test_connect_to_destination_over_circuit_fail(persistent_launch_tor,
     is_usable, response = connect_to_destination_over_circuit(
         bad_destination, circuit_id, session, persistent_launch_tor, 1024)
     assert is_usable is False
+
+    # because it is the first time it fails, failures aren't count
+    assert bad_destination.failed
+    assert bad_destination.consecutive_failures == 0
+    assert bad_destination.is_functional
+
+    # fail twice in a row
+    is_usable, response = connect_to_destination_over_circuit(
+        bad_destination, circuit_id, session, persistent_launch_tor, 1024)
+    assert bad_destination.failed
+    assert bad_destination.consecutive_failures == 1
+    assert bad_destination.is_functional
+
+
+def test_functional_destinations(conf, cb, rl, persistent_launch_tor):
+    good_destination = Destination('https://127.0.0.1:28888', 1024, False)
+    # Mock that it failed before and just now, but it's still considered
+    # functional.
+    good_destination.consecutive_failures = 3
+    good_destination.failed = True
+    bad_destination = Destination('https://example.example', 1024, False)
+    # Mock that it didn't fail now, but it already failed 11 consecutive
+    # times.
+    bad_destination.consecutive_failures = \
+        MAXIMUM_NUMBER_DESTINATION_FAILURES + 1
+    bad_destination.failed = False
+    # None of the arguments are used, move to unit tests when this get
+    # refactored
+    destination_list = DestinationList(
+        conf, [good_destination, bad_destination], cb, rl,
+        persistent_launch_tor)
+    expected_functional_destinations = [good_destination]
+    functional_destinations = destination_list.functional_destinations
+    assert expected_functional_destinations == functional_destinations
diff --git a/tests/integration/lib/test_relayprioritizer.py b/tests/integration/lib/test_relayprioritizer.py
index 76c5969..60e3405 100644
--- a/tests/integration/lib/test_relayprioritizer.py
+++ b/tests/integration/lib/test_relayprioritizer.py
@@ -14,6 +14,7 @@ def static_time(value):
 def _build_result_for_relay(conf, rl, result_type, relay_nick,
                             timestamp):
     relay = [r for r in rl.relays if r.nickname == relay_nick]
+    print(rl.relays)
     assert len(relay) == 1
     relay = relay[0]
     other = [r for r in rl.relays if r.nickname != relay_nick][0]





More information about the tor-commits mailing list