commit 654367f0260ff144898dc0ac2a45b710e7350f29 Author: teor teor2345@gmail.com Date: Sat Dec 10 21:35:23 2016 +1100
Allow fallbacks serving consensuses that expired less than 24 hours ago
This works around #20909, where relays serve stale consensuses for a short time, and then recover.
Update to the fix for #20539. --- changes/fallbacks-201612 | 3 ++- scripts/maint/updateFallbackDirs.py | 35 +++++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 9 deletions(-)
diff --git a/changes/fallbacks-201612 b/changes/fallbacks-201612 index 5bb7bdd..03d9843 100644 --- a/changes/fallbacks-201612 +++ b/changes/fallbacks-201612 @@ -13,7 +13,8 @@ - Make it easier to change the output sort order of fallbacks. Closes ticket 20822. - Exclude relays affected by 20499 from the fallback list. Exclude known - affected versions, and any relay that delivers a stale consensus. + affected versions, and any relay that delivers a stale consensus, as + long as that consensus expired more than 24 hours ago. Closes ticket 20539. - Require fallbacks to have flags for 90% of the time (weighted decaying average), rather than 95%. This allows at least 73% of clients to diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py index 41d210f..4f60fd4 100755 --- a/scripts/maint/updateFallbackDirs.py +++ b/scripts/maint/updateFallbackDirs.py @@ -88,6 +88,19 @@ MUST_BE_RUNNING_NOW = (PERFORM_IPV4_DIRPORT_CHECKS # Clients have been using microdesc consensuses by default for a while now DOWNLOAD_MICRODESC_CONSENSUS = True
+# If a relay delivers an expired consensus, if it expired less than this many +# seconds ago, we still allow the relay. This should never be less than -90, +# as all directory mirrors should have downloaded a consensus 90 minutes +# before it expires. It should never be more than 24 hours, because clients +# reject consensuses that are older than REASONABLY_LIVE_TIME. +# For the consensus expiry check to be accurate, the machine running this +# script needs an accurate clock. +# We use 24 hours to compensate for #20909, where relays on 0.2.9.5-alpha and +# 0.3.0.0-alpha-dev and later deliver stale consensuses, but typically recover +# after ~12 hours. +# We should make this lower when #20909 is fixed, see #20942. +CONSENSUS_EXPIRY_TOLERANCE = 24*60*60 + # Output fallback name, flags, bandwidth, and ContactInfo in a C comment? OUTPUT_COMMENTS = True if OUTPUT_CANDIDATES else False
@@ -1146,7 +1159,6 @@ class Candidate(object): def fallback_consensus_download_speed(dirip, dirport, nickname, fingerprint, max_time): download_failed = False - start = datetime.datetime.utcnow() # some directory mirrors respond to requests in ways that hang python # sockets, which is why we log this line here logging.info('Initiating %sconsensus download from %s (%s:%d) %s.', @@ -1155,6 +1167,7 @@ class Candidate(object): # there appears to be about 1 second of overhead when comparing stem's # internal trace time and the elapsed time calculated here TIMEOUT_SLOP = 1.0 + start = datetime.datetime.utcnow() try: consensus = get_consensus( endpoints = [(dirip, dirport)], @@ -1165,26 +1178,32 @@ class Candidate(object): document_handler = DocumentHandler.BARE_DOCUMENT, microdescriptor = DOWNLOAD_MICRODESC_CONSENSUS ).run()[0] + end = datetime.datetime.utcnow() + time_since_expiry = (end - consensus.valid_until).total_seconds() except Exception, stem_error: + end = datetime.datetime.utcnow() logging.info('Unable to retrieve a consensus from %s: %s', nickname, stem_error) status = 'error: "%s"' % (stem_error) level = logging.WARNING download_failed = True - elapsed = (datetime.datetime.utcnow() - start).total_seconds() + elapsed = (end - start).total_seconds() if download_failed: - # keep the error failure status + # keep the error failure status, and avoid using the variables pass elif elapsed > max_time: status = 'too slow' level = logging.WARNING download_failed = True - elif datetime.datetime.utcnow() > consensus.valid_until: - time_since_expiry = (datetime.datetime.utcnow() - - consensus.valid_until).total_seconds() + elif (time_since_expiry > 0): status = 'outdated consensus, expired %ds ago'%(int(time_since_expiry)) - level = logging.WARNING - download_failed = True + if time_since_expiry <= CONSENSUS_EXPIRY_TOLERANCE: + status += ', tolerating up to %ds'%(CONSENSUS_EXPIRY_TOLERANCE) + level = logging.INFO + else: + status += ', invalid' + level = logging.WARNING + download_failed = True else: status = 'ok' level = logging.DEBUG
tor-commits@lists.torproject.org