[tor/master] Simplify fallback list output

26 Apr 2016

commit 999834324b47f920473b5e28a11b1833d6543d7c
Author: teor (Tim Wilson-Brown) <teor2345@gmail.com>
Date:   Mon Apr 11 14:32:57 2016 +1000

    Simplify fallback list output
    
    When creating the list of fallbacks for a release:
    * don't output fallback name and contact
    * sort fallbacks by fingerprint
---
 scripts/maint/updateFallbackDirs.py | 151 ++++++++++++++++++++++++++----------
 1 file changed, 111 insertions(+), 40 deletions(-)

diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
index 44a7318..793ec7d 100755
--- a/scripts/maint/updateFallbackDirs.py
+++ b/scripts/maint/updateFallbackDirs.py
@@ -53,6 +53,9 @@ PERFORM_IPV4_DIRPORT_CHECKS = False if OUTPUT_CANDIDATES else True
 # Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
 PERFORM_IPV6_DIRPORT_CHECKS = False if OUTPUT_CANDIDATES else False
 
+# Output fallback name, flags, and ContactInfo in a C comment?
+OUTPUT_COMMENTS = True if OUTPUT_CANDIDATES else False
+
 # Output matching ContactInfo in fallbacks list or the blacklist?
 # Useful if you're trying to contact operators
 CONTACT_COUNT = True if OUTPUT_CANDIDATES else False
@@ -933,8 +936,9 @@ class Candidate(object):
   def is_running(self):
     return 'Running' in self._data['flags']
 
+  # report how long it takes to download a consensus from dirip:dirport
   @staticmethod
-  def fallback_consensus_dl_speed(dirip, dirport, nickname, max_time):
+  def fallback_consensus_download_speed(dirip, dirport, nickname, max_time):
     download_failed = False
     downloader = DescriptorDownloader()
     start = datetime.datetime.utcnow()
@@ -970,47 +974,60 @@ class Candidate(object):
                  dirip, dirport, max_time)
     return download_failed
 
-  def fallback_consensus_dl_check(self):
+  # does this fallback download the consensus fast enough?
+  def check_fallback_download_consensus(self):
     # include the relay if we're not doing a check, or we can't check (IPv6)
     ipv4_failed = False
     ipv6_failed = False
     if PERFORM_IPV4_DIRPORT_CHECKS:
-      ipv4_failed = Candidate.fallback_consensus_dl_speed(self.dirip,
+      ipv4_failed = Candidate.fallback_consensus_download_speed(self.dirip,
                                                 self.dirport,
                                                 self._data['nickname'],
                                                 CONSENSUS_DOWNLOAD_SPEED_MAX)
     if self.ipv6addr is not None and PERFORM_IPV6_DIRPORT_CHECKS:
       # Clients assume the IPv6 DirPort is the same as the IPv4 DirPort
-      ipv6_failed = Candidate.fallback_consensus_dl_speed(self.ipv6addr,
-                                                self.dirport,
-                                                self._data['nickname'],
-                                                CONSENSUS_DOWNLOAD_SPEED_MAX)
-    # Now retry the relay if it took too long the first time
-    if (PERFORM_IPV4_DIRPORT_CHECKS and ipv4_failed
-        and CONSENSUS_DOWNLOAD_RETRY):
-      ipv4_failed = Candidate.fallback_consensus_dl_speed(self.dirip,
-                                                self.dirport,
-                                                self._data['nickname'],
-                                                CONSENSUS_DOWNLOAD_SPEED_MAX)
-    if (self.ipv6addr is not None and PERFORM_IPV6_DIRPORT_CHECKS
-        and ipv6_failed and CONSENSUS_DOWNLOAD_RETRY):
-      ipv6_failed = Candidate.fallback_consensus_dl_speed(self.ipv6addr,
+      ipv6_failed = Candidate.fallback_consensus_download_speed(self.ipv6addr,
                                                 self.dirport,
                                                 self._data['nickname'],
                                                 CONSENSUS_DOWNLOAD_SPEED_MAX)
     return ((not ipv4_failed) and (not ipv6_failed))
 
-  def fallbackdir_line(self, dl_speed_ok, fallbacks, prefilter_fallbacks):
+  # if this fallback has not passed a download check, try it again,
+  # and record the result, available in get_fallback_download_consensus
+  def try_fallback_download_consensus(self):
+    if not self.get_fallback_download_consensus():
+      self._data['download_check'] = self.check_fallback_download_consensus()
+
+  # did this fallback pass the download check?
+  def get_fallback_download_consensus(self):
+    # if we're not performing checks, return True
+    if not PERFORM_IPV4_DIRPORT_CHECKS and not PERFORM_IPV6_DIRPORT_CHECKS:
+      return True
+    # if we are performing checks, but haven't done one, return False
+    if not self._data.has_key('download_check'):
+      return False
+    return self._data['download_check']
+
+  # output an optional header comment and info for this fallback
+  # try_fallback_download_consensus before calling this
+  def fallbackdir_line(self, fallbacks, prefilter_fallbacks):
+    s = ''
+    if OUTPUT_COMMENTS:
+      s += self.fallbackdir_comment(fallbacks, prefilter_fallbacks)
+    # if the download speed is ok, output a C string
+    # if it's not, but we OUTPUT_COMMENTS, output a commented-out C string
+    if self.get_fallback_download_consensus() or OUTPUT_COMMENTS:
+      s += self.fallbackdir_info(self.get_fallback_download_consensus())
+    return s
+
+  # output a header comment for this fallback
+  def fallbackdir_comment(self, fallbacks, prefilter_fallbacks):
     # /*
     # nickname
     # flags
     # [contact]
     # [identical contact counts]
     # */
-    # "address:dirport orport=port id=fingerprint"
-    # "[ipv6=addr:orport]"
-    # "weight=FALLBACK_OUTPUT_WEIGHT",
-    #
     # Multiline C comment
     s = '/*'
     s += '\n'
@@ -1040,9 +1057,26 @@ class Candidate(object):
       s += '\n'
     s += '*/'
     s += '\n'
+
+  # output the fallback info C string for this fallback
+  # this is the text that would go after FallbackDir in a torrc
+  # if this relay failed the download test and we OUTPUT_COMMENTS,
+  # comment-out the returned string
+  def fallbackdir_info(self, dl_speed_ok):
+    # "address:dirport orport=port id=fingerprint"
+    # "[ipv6=addr:orport]"
+    # "weight=FALLBACK_OUTPUT_WEIGHT",
+    #
+    # Do we want a C string, or a commented-out string?
+    c_string = dl_speed_ok
+    comment_string = not dl_speed_ok and OUTPUT_COMMENTS
+    # If we don't want either kind of string, bail
+    if not c_string and not comment_string:
+      return ''
+    s = ''
     # Comment out the fallback directory entry if it's too slow
     # See the debug output for which address and port is failing
-    if not dl_speed_ok:
+    if comment_string:
       s += '/* Consensus download failed or was too slow:\n'
     # Multi-Line C string with trailing comma (part of a string list)
     # This makes it easier to diff the file, and remove IPv6 lines using grep
@@ -1057,7 +1091,7 @@ class Candidate(object):
             cleanse_c_string(self.ipv6addr), cleanse_c_string(self.ipv6orport))
       s += '\n'
     s += '" weight=%d",'%(FALLBACK_OUTPUT_WEIGHT)
-    if not dl_speed_ok:
+    if comment_string:
       s += '\n'
       s += '*/'
     return s
@@ -1129,13 +1163,19 @@ class CandidateList(dict):
   # lowest to highest
   # used to find the median cw_to_bw_factor()
   def sort_fallbacks_by_cw_to_bw_factor(self):
-    self.fallbacks.sort(key=lambda x: self[x].cw_to_bw_factor())
+    self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor(), self.fallbacks)
 
   # sort fallbacks by their measured bandwidth, highest to lowest
   # calculate_measured_bandwidth before calling this
+  # this is useful for reviewing candidates in priority order
   def sort_fallbacks_by_measured_bandwidth(self):
-    self.fallbacks.sort(key=lambda x: self[x].self._data['measured_bandwidth'],
-                        reverse=True)
+    self.fallbacks.sort(key=lambda f: f._data['measured_bandwidth'],
+                        self.fallbacks, reverse=True)
+
+  # sort fallbacks by their fingerprint, lowest to highest
+  # this is useful for stable diffs of fallback lists
+  def sort_fallbacks_by_fingerprint(self):
+    self.fallbacks.sort(key=lambda f: self[f]._fpr, self.fallbacks)
 
   @staticmethod
   def load_relaylist(file_name):
@@ -1301,6 +1341,39 @@ class CandidateList(dict):
     else:
       return None
 
+  # try a download check on each fallback candidate in order
+  # stop after max_count successful downloads
+  # but don't remove any candidates from the array
+  def try_download_consensus_checks(self, max_count):
+    dl_ok_count = 0
+    for f in self.fallbacks:
+      f.try_fallback_download_consensus()
+      if f.get_fallback_download_consensus():
+        # this fallback downloaded a consensus ok
+        dl_ok_count += 1
+        if dl_ok_count >= max_count:
+          # we have enough fallbacks
+          return
+
+  # put max_count successful candidates in the fallbacks array:
+  # - perform download checks on each fallback candidate
+  # - retry failed candidates if CONSENSUS_DOWNLOAD_RETRY is set
+  # - eliminate failed candidates
+  # - if there are more than max_count candidates, eliminate lowest bandwidth
+  # - if there are fewer than max_count candidates, leave only successful
+  def perform_download_consensus_checks(self, max_count):
+    self.sort_fallbacks_by_measured_bandwidth()
+    self.try_download_consensus_checks(max_count)
+    if CONSENSUS_DOWNLOAD_RETRY:
+      # try unsuccessful candidates again
+      # we could end up with more than max_count successful candidates here
+      self.try_download_consensus_checks(max_count)
+    # now we have at least max_count successful candidates,
+    # or we've tried them all
+    self.fallbacks = filter(lambda x: x.get_fallback_download_consensus(),
+                            self.fallbacks)
+    self.fallbacks = self.fallbacks[:max_count]
+
   def summarise_fallbacks(self, eligible_count, guard_count, target_count,
                           max_count):
     # Report:
@@ -1393,9 +1466,6 @@ def list_fallbacks():
   # then remove low-bandwidth relays
   candidates.calculate_measured_bandwidth()
   candidates.remove_low_bandwidth_relays()
-  # make sure the list is sorted by bandwidth when we output it
-  # so that we include the active fallbacks with the greatest bandwidth
-  candidates.sort_fallbacks_by_measured_bandwidth()
 
   # print the raw fallback list
   #for x in candidates.fallbacks:
@@ -1412,17 +1482,18 @@ def list_fallbacks():
   for s in fetch_source_list():
     print describe_fetch_source(s)
 
-  active_count = 0
+  # check if each candidate can serve a consensus
+  candidates.perform_download_consensus_checks(max_count)
+
+  # if we're outputting the final fallback list, sort by fingerprint
+  # this makes diffs much more stable
+  # otherwise, leave sorted by bandwidth, which allows operators to be
+  # contacted in priority order
+  if not OUTPUT_CANDIDATES:
+    candidates.sort_fallbacks_by_fingerprint()
+
   for x in candidates.fallbacks:
-    dl_speed_ok = x.fallback_consensus_dl_check()
-    print x.fallbackdir_line(dl_speed_ok, candidates.fallbacks,
-                             prefilter_fallbacks)
-    if dl_speed_ok:
-      # this fallback is included in the list
-      active_count += 1
-      if active_count >= max_count:
-        # we have enough fallbacks
-        break
+    print x.fallbackdir_line(candidates.fallbacks, prefilter_fallbacks)
 
 if __name__ == "__main__":
   list_fallbacks()

    

nickm＠torproject.org

tags

participants (1)