[tor-bugs] #33033 [Core Tor/sbws]: sbws stuck thinking a destination is dead

Tor Bug Tracker & Wiki blackhole at torproject.org
Mon Jan 27 09:36:16 UTC 2020


#33033: sbws stuck thinking a destination is dead
---------------------------+-----------------------------------
 Reporter:  tom            |          Owner:  (none)
     Type:  defect         |         Status:  new
 Priority:  High           |      Milestone:  sbws: 1.1.x-final
Component:  Core Tor/sbws  |        Version:  sbws: 1.1.0
 Severity:  Normal         |     Resolution:
 Keywords:                 |  Actual Points:
Parent ID:                 |         Points:
 Reviewer:                 |        Sponsor:
---------------------------+-----------------------------------

Comment (by tom):

 Still in testing.

 {{{
 diff --git a/sbws/globals.py b/sbws/globals.py
 index e9fd8dd..b50384b 100644
 --- a/sbws/globals.py
 +++ b/sbws/globals.py
 @@ -130,6 +130,9 @@ NUM_DESTINATION_ATTEMPTS_STORED = 10
  # Because intermitent failures with CDN destinations, start trying again
  # after 5 min.
  DELTA_SECONDS_RETRY_DESTINATION = 60 * 5
 +# No matter what, do not increase the wait time between destination
 reties
 +# past this value.
 +MAX_SECONDS_BETWEEN_DESTINATION_RETRIES = 60 * 60 * 3
  # Number of consecutive times a destination can fail before considering
 it
  # not functional.
  MAX_NUM_DESTINATION_FAILURES = 3
 diff --git a/sbws/lib/destination.py b/sbws/lib/destination.py
 index 3ccd94f..a53b8ca 100644
 --- a/sbws/lib/destination.py
 +++ b/sbws/lib/destination.py
 @@ -11,6 +11,7 @@ import sbws.util.stem as stem_utils
  from ..globals import (
      MAX_NUM_DESTINATION_FAILURES,
      DELTA_SECONDS_RETRY_DESTINATION,
 +    MAX_SECONDS_BETWEEN_DESTINATION_RETRIES,
      NUM_DESTINATION_ATTEMPTS_STORED,
      FACTOR_INCREMENT_DESTINATION_RETRY
      )
 @@ -121,7 +122,7 @@ def connect_to_destination_over_circuit(dest, circ_id,
 session, cont, max_dl):
              '{} not {}'.format(requests.codes.ok, head.status_code)
      if 'content-length' not in head.headers:
          dest.add_failure()
 -        return False, error_prefix + 'we except the header Content-Length
 '\
 +        return False, error_prefix + 'we expect the header Content-Length
 '\
              'to exist in the response'
      content_length = int(head.headers['content-length'])
      if max_dl > content_length:
 @@ -143,6 +144,7 @@ class Destination:
      def __init__(self, url, max_dl, verify,
                   max_num_failures=MAX_NUM_DESTINATION_FAILURES,
                   delta_seconds_retry=DELTA_SECONDS_RETRY_DESTINATION,
 +
 max_seconds_between_retries=MAX_SECONDS_BETWEEN_DESTINATION_RETRIES,
                   num_attempts_stored=NUM_DESTINATION_ATTEMPTS_STORED,
 factor_increment_retry=FACTOR_INCREMENT_DESTINATION_RETRY):
          """Initalizes the Web server from which the data is downloaded.
 @@ -169,6 +171,8 @@ class Destination:
          # Default delta time to try a destination that was not
 functional.
          self._default_delta_seconds_retry = delta_seconds_retry
          self._delta_seconds_retry = delta_seconds_retry
 +        # A cap on the time to wait between destination retries.
 +        self._max_seconds_between_retries = max_seconds_between_retries
          # Using a deque (FIFO) to do not grow forever and
          # to do not have to remove old attempts.
          # Store tuples of timestamp and whether the destination succed or
 not
 @@ -201,20 +205,30 @@ class Destination:
          Increment the time a destination will be tried again by a
 ``factor``.
          """
          self._delta_seconds_retry *= factor or self._factor
 -        log.info("Incremented the time to try destination %s to %s
 hours.",
 -                 self.url, self._delta_seconds_retry / 60 / 60)
 +        if self._delta_seconds_retry > self._max_seconds_between_retries:
 +            self._delta_seconds_retry = self._max_seconds_between_retries
 +            log.info("Incremented the time to try destination %s past the
 "
 +                     "limit, capping it at %s hours.",
 +                     self.url, self._delta_seconds_retry / 60 / 60)
 +        else:
 +            log.info("Incremented the time to try destination %s to %s
 hours.",
 +                     self.url, self._delta_seconds_retry / 60 / 60)

 -    def _is_last_try_old_enough(self, n=None):
 +    def _get_last_try_in_seconds_ago(self):
          """
 -        Return True if the last time it was used it was ``n`` seconds
 ago.
 +        Return the delta between the last try and now, as positive
 seconds.
          """
          # Timestamp of the last attempt.
          last_time = self._attempts[-1][0]
 +        return (datetime.datetime.utcnow() - last_time).total_seconds()
 +
 +    def _is_last_try_old_enough(self, n=None):
 +        """
 +        Return True if the last time it was used it was ``n`` seconds
 ago.
 +        """
          # If the last attempt is older than _delta_seconds_retry, try
 again
 -        return (datetime.datetime.utcnow()
 -                - datetime.timedelta(seconds=self._delta_seconds_retry)
 -                > last_time)
 -        return False
 +        return (self._get_last_try_in_seconds_ago() >
 +                self._delta_seconds_retry)

      def is_functional(self):
          """Whether connections to a destination are failing or not.
 @@ -238,18 +252,21 @@ class Destination:
          if self._are_last_attempts_failures():
              # The log here will appear in all the the queued
              #  relays and threads.
 -            log.warning("The last %s times the destination %s failed."
 -                        "Disabled for %s minutes.",
 +            log.warning("The last %s times the destination %s failed. "
 +                        "It last ran %s seconds ago. "
 +                        "Disabled for %s seconds.",
                          self._max_num_failures, self.url,
 -                        self._delta_seconds_retry / 60)
 +                        self._get_last_try_in_seconds_ago(),
 +                        self._delta_seconds_retry)
              log.warning("Please, add more destinations or increment the "
                          "number of maximum number of consecutive failures
 "
                          "in the configuration.")
              # It was not used for a while and the last time it was used
              # was long ago, then try again
              if self._is_last_try_old_enough():
 -                log.info("The destination %s was not tried for %s hours,
 "
 -                         "it is going to by tried again.")
 +                log.info("The destination %s was not tried for %s
 seconds, "
 +                         "it is going to by tried again.", self.url,
 +                         self._get_last_try_in_seconds_ago())
                  # Set the next time to retry higher, in case this attempt
 fails
                  self._increment_time_to_retry()
                  return True

 }}}

--
Ticket URL: <https://trac.torproject.org/projects/tor/ticket/33033#comment:4>
Tor Bug Tracker & Wiki <https://trac.torproject.org/>
The Tor Project: anonymity online


More information about the tor-bugs mailing list