[tor-commits] [tor/release-0.2.8] Limit fallbacks from the same operator

nickm at torproject.org nickm at torproject.org
Tue Apr 26 23:28:11 UTC 2016


commit c157a31ee8bd84587e6e61b674b33c792154d74a
Author: teor (Tim Wilson-Brown) <teor2345 at gmail.com>
Date:   Thu Apr 14 18:40:06 2016 +1000

    Limit fallbacks from the same operator
    
    Use IP address, effective family, and contact info to
    discover and limit fallbacks to one per operator.
    
    Also analyse netblock, ports, IP version, and Exit flag,
    and print the results. Don't exclude any fallbacks from
    the list because of netblocks, ports, IP version, or
    Exit flag.
---
 scripts/maint/updateFallbackDirs.py | 550 +++++++++++++++++++++++++++++++++---
 1 file changed, 510 insertions(+), 40 deletions(-)

diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
index 793ec7d..4cfee5d 100755
--- a/scripts/maint/updateFallbackDirs.py
+++ b/scripts/maint/updateFallbackDirs.py
@@ -2,9 +2,12 @@
 
 # Usage: scripts/maint/updateFallbackDirs.py > src/or/fallback_dirs.inc
 # Needs stem available in your PYTHONPATH, or just ln -s ../stem/stem .
+# Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package)
+# for netblock analysis, in PYTHONPATH, or just
+# ln -s ../py2-ipaddress-3.4.1/ipaddress.py .
 #
-# Then read the generated list to ensure no-one slipped anything funny into
-# their name or contactinfo
+# Then read the logs to make sure the fallbacks aren't dominated by a single
+# netblock or port
 
 # Script by weasel, April 2015
 # Portions by gsathya & karsten, 2013
@@ -34,6 +37,21 @@ import logging
 # INFO tells you why each relay was included or excluded
 # WARN tells you about potential misconfigurations
 logging.basicConfig(level=logging.WARNING)
+logging.root.name = ''
+# INFO tells you about each consensus download attempt
+logging.getLogger('stem').setLevel(logging.WARNING)
+
+HAVE_IPADDRESS = False
+try:
+  # python 3 builtin, or install package py2-ipaddress
+  # there are several ipaddress implementations for python 2
+  # with slightly different semantics with str typed text
+  # fortunately, all our IP addresses are in unicode
+  import ipaddress
+  HAVE_IPADDRESS = True
+except ImportError:
+  # if this happens, we avoid doing netblock analysis
+  logging.warning('Unable to import ipaddress, please install py2-ipaddress')
 
 ## Top-Level Configuration
 
@@ -468,6 +486,9 @@ class Candidate(object):
       # relays without advertised bandwdith have it calculated from their
       # consensus weight
       details['advertised_bandwidth'] = 0
+    if (not 'effective_family' in details
+        or details['effective_family'] is None):
+      details['effective_family'] = []
     details['last_changed_address_or_port'] = parse_ts(
                                       details['last_changed_address_or_port'])
     self._data = details
@@ -480,7 +501,7 @@ class Candidate(object):
     if self.orport is None:
       raise Exception("Failed to get an orport for %s."%(self._fpr,))
     self._compute_ipv6addr()
-    if self.ipv6addr is None:
+    if not self.has_ipv6():
       logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
 
   def _stable_sort_or_addresses(self):
@@ -584,14 +605,14 @@ class Candidate(object):
       (ipaddr, port) = i.rsplit(':', 1)
       if (port == self.orport) and Candidate.is_valid_ipv6_address(ipaddr):
         self.ipv6addr = ipaddr
-        self.ipv6orport = port
+        self.ipv6orport = int(port)
         return
     # Choose the first IPv6 address in the list
     for i in self._data['or_addresses']:
       (ipaddr, port) = i.rsplit(':', 1)
       if Candidate.is_valid_ipv6_address(ipaddr):
         self.ipv6addr = ipaddr
-        self.ipv6orport = port
+        self.ipv6orport = int(port)
         return
 
   @staticmethod
@@ -804,9 +825,10 @@ class Candidate(object):
                      'ORPort (%d) does not match entry ORPort (%d)',
                      self._fpr, self.orport, int(entry['orport']))
         continue
-      has_ipv6 = self.ipv6addr is not None and self.ipv6orport is not None
-      if (entry.has_key('ipv6') and has_ipv6):
-        ipv6 = self.ipv6addr + ':' + self.ipv6orport
+      ipv6 = None
+      if self.has_ipv6():
+        ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
+      if entry.has_key('ipv6') and self.has_ipv6():
         # if both entry and fallback have an ipv6 address, compare them
         if entry['ipv6'] != ipv6:
           logging.info('%s is not in the whitelist: fingerprint matches, ' +
@@ -815,14 +837,14 @@ class Candidate(object):
           continue
       # if the fallback has an IPv6 address but the whitelist entry
       # doesn't, or vice versa, the whitelist entry doesn't match
-      elif entry.has_key('ipv6') and not has_ipv6:
+      elif entry.has_key('ipv6') and not self.has_ipv6():
         logging.info('%s is not in the whitelist: fingerprint matches, but ' +
                      'it has no IPv6, and entry has IPv6 (%s)', self._fpr,
                      entry['ipv6'])
         logging.warning('%s excluded: has it lost its former IPv6 address %s?',
                         self._fpr, entry['ipv6'])
         continue
-      elif not entry.has_key('ipv6') and has_ipv6:
+      elif not entry.has_key('ipv6') and self.has_ipv6():
         logging.info('%s is not in the whitelist: fingerprint matches, but ' +
                      'it has IPv6 (%s), and entry has no IPv6', self._fpr,
                      ipv6)
@@ -871,9 +893,10 @@ class Candidate(object):
                          'entry has no DirPort or ORPort', self._fpr,
                          self.dirip)
             return True
-        has_ipv6 = self.ipv6addr is not None and self.ipv6orport is not None
-        ipv6 = (self.ipv6addr + ':' + self.ipv6orport) if has_ipv6 else None
-        if (key == 'ipv6' and has_ipv6):
+        ipv6 = None
+        if self.has_ipv6():
+          ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
+        if (key == 'ipv6' and self.has_ipv6()):
         # if both entry and fallback have an ipv6 address, compare them,
         # otherwise, disregard ipv6 addresses
           if value == ipv6:
@@ -889,18 +912,18 @@ class Candidate(object):
               logging.info('%s is in the blacklist: IPv6 (%s) matches, and' +
                            'entry has no DirPort', self._fpr, ipv6)
               return True
-        elif (key == 'ipv6' or has_ipv6):
+        elif (key == 'ipv6' or self.has_ipv6()):
           # only log if the fingerprint matches but the IPv6 doesn't
           if entry.has_key('id') and entry['id'] == self._fpr:
             logging.info('%s skipping IPv6 blacklist comparison: relay ' +
                          'has%s IPv6%s, but entry has%s IPv6%s', self._fpr,
-                         '' if has_ipv6 else ' no',
-                         (' (' + ipv6 + ')') if has_ipv6 else  '',
+                         '' if self.has_ipv6() else ' no',
+                         (' (' + ipv6 + ')') if self.has_ipv6() else  '',
                          '' if key == 'ipv6' else ' no',
                          (' (' + value + ')') if key == 'ipv6' else '')
             logging.warning('Has %s %s IPv6 address %s?', self._fpr,
-                            'gained an' if has_ipv6 else 'lost its former',
-                            ipv6 if has_ipv6 else value)
+                        'gained an' if self.has_ipv6() else 'lost its former',
+                        ipv6 if self.has_ipv6() else value)
     return False
 
   def cw_to_bw_factor(self):
@@ -936,6 +959,101 @@ class Candidate(object):
   def is_running(self):
     return 'Running' in self._data['flags']
 
+  # does this fallback have an IPv6 address and orport?
+  def has_ipv6(self):
+    return self.ipv6addr is not None and self.ipv6orport is not None
+
+  # strip leading and trailing brackets from an IPv6 address
+  # safe to use on non-bracketed IPv6 and on IPv4 addresses
+  # also convert to unicode, and make None appear as ''
+  @staticmethod
+  def strip_ipv6_brackets(ip):
+    if ip is None:
+      return unicode('')
+    if len(ip) < 2:
+      return unicode(ip)
+    if ip[0] == '[' and ip[-1] == ']':
+      return unicode(ip[1:-1])
+    return unicode(ip)
+
+  # are ip_a and ip_b in the same netblock?
+  # mask_bits is the size of the netblock
+  # takes both IPv4 and IPv6 addresses
+  # the versions of ip_a and ip_b must be the same
+  # the mask must be valid for the IP version
+  @staticmethod
+  def netblocks_equal(ip_a, ip_b, mask_bits):
+    if ip_a is None or ip_b is None:
+      return False
+    ip_a = Candidate.strip_ipv6_brackets(ip_a)
+    ip_b = Candidate.strip_ipv6_brackets(ip_b)
+    a = ipaddress.ip_address(ip_a)
+    b = ipaddress.ip_address(ip_b)
+    if a.version != b.version:
+      raise Exception('Mismatching IP versions in %s and %s'%(ip_a, ip_b))
+    if mask_bits > a.max_prefixlen:
+      logging.warning('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b))
+      mask_bits = a.max_prefixlen
+    if mask_bits < 0:
+      logging.warning('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b))
+      mask_bits = 0
+    a_net = ipaddress.ip_network('%s/%d'%(ip_a, mask_bits), strict=False)
+    return b in a_net
+
+  # is this fallback's IPv4 address (dirip) in the same netblock as other's
+  # IPv4 address?
+  # mask_bits is the size of the netblock
+  def ipv4_netblocks_equal(self, other, mask_bits):
+    return Candidate.netblocks_equal(self.dirip, other.dirip, mask_bits)
+
+  # is this fallback's IPv6 address (ipv6addr) in the same netblock as
+  # other's IPv6 address?
+  # Returns False if either fallback has no IPv6 address
+  # mask_bits is the size of the netblock
+  def ipv6_netblocks_equal(self, other, mask_bits):
+    if not self.has_ipv6() or not other.has_ipv6():
+      return False
+    return Candidate.netblocks_equal(self.ipv6addr, other.ipv6addr, mask_bits)
+
+  # is this fallback's IPv4 DirPort the same as other's IPv4 DirPort?
+  def dirport_equal(self, other):
+    return self.dirport == other.dirport
+
+  # is this fallback's IPv4 ORPort the same as other's IPv4 ORPort?
+  def ipv4_orport_equal(self, other):
+    return self.orport == other.orport
+
+  # is this fallback's IPv6 ORPort the same as other's IPv6 ORPort?
+  # Returns False if either fallback has no IPv6 address
+  def ipv6_orport_equal(self, other):
+    if not self.has_ipv6() or not other.has_ipv6():
+      return False
+    return self.ipv6orport == other.ipv6orport
+
+  # does this fallback have the same DirPort, IPv4 ORPort, or
+  # IPv6 ORPort as other?
+  # Ignores IPv6 ORPort if either fallback has no IPv6 address
+  def port_equal(self, other):
+    return (self.dirport_equal(other) or self.ipv4_orport_equal(other)
+            or self.ipv6_orport_equal(other))
+
+  # return a list containing IPv4 ORPort, DirPort, and IPv6 ORPort (if present)
+  def port_list(self):
+    ports = [self.dirport, self.orport]
+    if self.has_ipv6() and not self.ipv6orport in ports:
+      ports.append(self.ipv6orport)
+    return ports
+
+  # does this fallback share a port with other, regardless of whether the
+  # port types match?
+  # For example, if self's IPv4 ORPort is 80 and other's DirPort is 80,
+  # return True
+  def port_shared(self, other):
+    for p in self.port_list():
+      if p in other.port_list():
+        return True
+    return False
+
   # report how long it takes to download a consensus from dirip:dirport
   @staticmethod
   def fallback_consensus_download_speed(dirip, dirport, nickname, max_time):
@@ -984,7 +1102,7 @@ class Candidate(object):
                                                 self.dirport,
                                                 self._data['nickname'],
                                                 CONSENSUS_DOWNLOAD_SPEED_MAX)
-    if self.ipv6addr is not None and PERFORM_IPV6_DIRPORT_CHECKS:
+    if self.has_ipv6() and PERFORM_IPV6_DIRPORT_CHECKS:
       # Clients assume the IPv6 DirPort is the same as the IPv4 DirPort
       ipv6_failed = Candidate.fallback_consensus_download_speed(self.ipv6addr,
                                                 self.dirport,
@@ -1086,9 +1204,8 @@ class Candidate(object):
             self.orport,
             cleanse_c_string(self._fpr))
     s += '\n'
-    if self.ipv6addr is not None:
-      s += '" ipv6=%s:%s"'%(
-            cleanse_c_string(self.ipv6addr), cleanse_c_string(self.ipv6orport))
+    if self.has_ipv6():
+      s += '" ipv6=%s:%d"'%(cleanse_c_string(self.ipv6addr), self.ipv6orport)
       s += '\n'
     s += '" weight=%d",'%(FALLBACK_OUTPUT_WEIGHT)
     if comment_string:
@@ -1126,7 +1243,7 @@ class CandidateList(dict):
     d = fetch('details',
         fields=('fingerprint,nickname,contact,last_changed_address_or_port,' +
                 'consensus_weight,advertised_bandwidth,or_addresses,' +
-                'dir_address,recommended_version,flags'))
+                'dir_address,recommended_version,flags,effective_family'))
     logging.debug('Loading details document done.')
 
     if not 'relays' in d: raise Exception("No relays found in document.")
@@ -1163,19 +1280,19 @@ class CandidateList(dict):
   # lowest to highest
   # used to find the median cw_to_bw_factor()
   def sort_fallbacks_by_cw_to_bw_factor(self):
-    self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor(), self.fallbacks)
+    self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor())
 
   # sort fallbacks by their measured bandwidth, highest to lowest
   # calculate_measured_bandwidth before calling this
   # this is useful for reviewing candidates in priority order
   def sort_fallbacks_by_measured_bandwidth(self):
     self.fallbacks.sort(key=lambda f: f._data['measured_bandwidth'],
-                        self.fallbacks, reverse=True)
+                        reverse=True)
 
   # sort fallbacks by their fingerprint, lowest to highest
   # this is useful for stable diffs of fallback lists
   def sort_fallbacks_by_fingerprint(self):
-    self.fallbacks.sort(key=lambda f: self[f]._fpr, self.fallbacks)
+    self.fallbacks.sort(key=lambda f: f._fpr)
 
   @staticmethod
   def load_relaylist(file_name):
@@ -1341,6 +1458,91 @@ class CandidateList(dict):
     else:
       return None
 
+  # does exclusion_list contain attribute?
+  # if so, return False
+  # if not, return True
+  # if attribute is None or the empty string, always return True
+  @staticmethod
+  def allow(attribute, exclusion_list):
+    if attribute is None or attribute == '':
+      return True
+    elif attribute in exclusion_list:
+      return False
+    else:
+      return True
+
+  # make sure there is only one fallback per IPv4 address, and per IPv6 address
+  # there is only one IPv4 address on each fallback: the IPv4 DirPort address
+  # (we choose the IPv4 ORPort which is on the same IPv4 as the DirPort)
+  # there is at most one IPv6 address on each fallback: the IPv6 ORPort address
+  # we try to match the IPv4 ORPort, but will use any IPv6 address if needed
+  # (clients assume the IPv6 DirPort is the same as the IPv4 DirPort, but
+  # typically only use the IPv6 ORPort)
+  # if there is no IPv6 address, only the IPv4 address is checked
+  # return the number of candidates we excluded
+  def limit_fallbacks_same_ip(self):
+    ip_limit_fallbacks = []
+    ip_list = []
+    for f in self.fallbacks:
+      if (CandidateList.allow(f.dirip, ip_list)
+          and CandidateList.allow(f.ipv6addr, ip_list)):
+        ip_limit_fallbacks.append(f)
+        ip_list.append(f.dirip)
+        if f.has_ipv6():
+          ip_list.append(f.ipv6addr)
+      elif not CandidateList.allow(f.dirip, ip_list):
+        logging.debug('Eliminated %s: already have fallback on IPv4 %s'%(
+                                                          f._fpr, f.dirip))
+      elif f.has_ipv6() and not CandidateList.allow(f.ipv6addr, ip_list):
+        logging.debug('Eliminated %s: already have fallback on IPv6 %s'%(
+                                                          f._fpr, f.ipv6addr))
+    original_count = len(self.fallbacks)
+    self.fallbacks = ip_limit_fallbacks
+    return original_count - len(self.fallbacks)
+
+  # make sure there is only one fallback per ContactInfo
+  # if there is no ContactInfo, allow the fallback
+  # this check can be gamed by providing no ContactInfo, or by setting the
+  # ContactInfo to match another fallback
+  # However, given the likelihood that relays with the same ContactInfo will
+  # go down at similar times, its usefulness outweighs the risk
+  def limit_fallbacks_same_contact(self):
+    contact_limit_fallbacks = []
+    contact_list = []
+    for f in self.fallbacks:
+      if CandidateList.allow(f._data['contact'], contact_list):
+        contact_limit_fallbacks.append(f)
+        contact_list.append(f._data['contact'])
+      else:
+        logging.debug(('Eliminated %s: already have fallback on ' +
+                       'ContactInfo %s')%(f._fpr, f._data['contact']))
+    original_count = len(self.fallbacks)
+    self.fallbacks = contact_limit_fallbacks
+    return original_count - len(self.fallbacks)
+
+  # make sure there is only one fallback per effective family
+  # if there is no family, allow the fallback
+  # this check can't be gamed, because we use effective family, which ensures
+  # mutual family declarations
+  # if any indirect families exist, the result depends on the order in which
+  # fallbacks are sorted in the list
+  def limit_fallbacks_same_family(self):
+    family_limit_fallbacks = []
+    fingerprint_list = []
+    for f in self.fallbacks:
+      if CandidateList.allow(f._fpr, fingerprint_list):
+        family_limit_fallbacks.append(f)
+        fingerprint_list.append(f._fpr)
+        fingerprint_list.extend(f._data['effective_family'])
+      else:
+        # technically, we already have a fallback with this fallback in its
+        # effective family
+        logging.debug('Eliminated %s: already have fallback in effective ' +
+                      'family'%(f._fpr))
+    original_count = len(self.fallbacks)
+    self.fallbacks = family_limit_fallbacks
+    return original_count - len(self.fallbacks)
+
   # try a download check on each fallback candidate in order
   # stop after max_count successful downloads
   # but don't remove any candidates from the array
@@ -1361,6 +1563,7 @@ class CandidateList(dict):
   # - eliminate failed candidates
   # - if there are more than max_count candidates, eliminate lowest bandwidth
   # - if there are fewer than max_count candidates, leave only successful
+  # Return the number of fallbacks that failed the consensus check
   def perform_download_consensus_checks(self, max_count):
     self.sort_fallbacks_by_measured_bandwidth()
     self.try_download_consensus_checks(max_count)
@@ -1370,12 +1573,245 @@ class CandidateList(dict):
       self.try_download_consensus_checks(max_count)
     # now we have at least max_count successful candidates,
     # or we've tried them all
+    original_count = len(self.fallbacks)
     self.fallbacks = filter(lambda x: x.get_fallback_download_consensus(),
                             self.fallbacks)
+    # some of these failed the check, others skipped the check,
+    # if we already had enough successful downloads
+    failed_count = original_count - len(self.fallbacks)
     self.fallbacks = self.fallbacks[:max_count]
+    return failed_count
+
+  # return a string that describes a/b as a percentage
+  @staticmethod
+  def describe_percentage(a, b):
+    return '%d/%d = %.0f%%'%(a, b, (a*100.0)/b)
+
+  # return a dictionary of lists of fallbacks by IPv4 netblock
+  # the dictionary is keyed by the fingerprint of an arbitrary fallback
+  # in each netblock
+  # mask_bits is the size of the netblock
+  def fallbacks_by_ipv4_netblock(self, mask_bits):
+    netblocks = {}
+    for f in self.fallbacks:
+      found_netblock = False
+      for b in netblocks.keys():
+        # we found an existing netblock containing this fallback
+        if f.ipv4_netblocks_equal(self[b], mask_bits):
+          # add it to the list
+          netblocks[b].append(f)
+          found_netblock = True
+          break
+      # make a new netblock based on this fallback's fingerprint
+      if not found_netblock:
+        netblocks[f._fpr] = [f]
+    return netblocks
+
+  # return a dictionary of lists of fallbacks by IPv6 netblock
+  # where mask_bits is the size of the netblock
+  def fallbacks_by_ipv6_netblock(self, mask_bits):
+    netblocks = {}
+    for f in self.fallbacks:
+      # skip fallbacks without IPv6 addresses
+      if not f.has_ipv6():
+        continue
+      found_netblock = False
+      for b in netblocks.keys():
+        # we found an existing netblock containing this fallback
+        if f.ipv6_netblocks_equal(self[b], mask_bits):
+          # add it to the list
+          netblocks[b].append(f)
+          found_netblock = True
+          break
+      # make a new netblock based on this fallback's fingerprint
+      if not found_netblock:
+        netblocks[f._fpr] = [f]
+    return netblocks
+
+  # log a message about the proportion of fallbacks in each IPv4 netblock,
+  # where mask_bits is the size of the netblock
+  def describe_fallback_ipv4_netblock_mask(self, mask_bits):
+    fallback_count = len(self.fallbacks)
+    shared_netblock_fallback_count = 0
+    most_frequent_netblock = None
+    netblocks = self.fallbacks_by_ipv4_netblock(mask_bits)
+    for b in netblocks.keys():
+      if len(netblocks[b]) > 1:
+        # how many fallbacks are in a netblock with other fallbacks?
+        shared_netblock_fallback_count += len(netblocks[b])
+        # what's the netblock with the most fallbacks?
+        if (most_frequent_netblock is None
+            or len(netblocks[b]) > len(netblocks[most_frequent_netblock])):
+          most_frequent_netblock = b
+        logging.debug('Fallback IPv4 addresses in the same /%d:'%(mask_bits))
+        for f in netblocks[b]:
+          logging.debug('%s - %s', f.dirip, f._fpr)
+    if most_frequent_netblock is not None:
+      logging.warning('There are %s fallbacks in the IPv4 /%d containing %s'%(
+                                    CandidateList.describe_percentage(
+                                      len(netblocks[most_frequent_netblock]),
+                                      fallback_count),
+                                    mask_bits,
+                                    self[most_frequent_netblock].dirip))
+    if shared_netblock_fallback_count > 0:
+      logging.warning(('%s of fallbacks are in an IPv4 /%d with other ' +
+                       'fallbacks')%(CandidateList.describe_percentage(
+                                                shared_netblock_fallback_count,
+                                                fallback_count),
+                                     mask_bits))
+
+  # log a message about the proportion of fallbacks in each IPv6 netblock,
+  # where mask_bits is the size of the netblock
+  def describe_fallback_ipv6_netblock_mask(self, mask_bits):
+    fallback_count = len(self.fallbacks_with_ipv6())
+    shared_netblock_fallback_count = 0
+    most_frequent_netblock = None
+    netblocks = self.fallbacks_by_ipv6_netblock(mask_bits)
+    for b in netblocks.keys():
+      if len(netblocks[b]) > 1:
+        # how many fallbacks are in a netblock with other fallbacks?
+        shared_netblock_fallback_count += len(netblocks[b])
+        # what's the netblock with the most fallbacks?
+        if (most_frequent_netblock is None
+            or len(netblocks[b]) > len(netblocks[most_frequent_netblock])):
+          most_frequent_netblock = b
+        logging.debug('Fallback IPv6 addresses in the same /%d:'%(mask_bits))
+        for f in netblocks[b]:
+          logging.debug('%s - %s', f.ipv6addr, f._fpr)
+    if most_frequent_netblock is not None:
+      logging.warning('There are %s fallbacks in the IPv6 /%d containing %s'%(
+                                    CandidateList.describe_percentage(
+                                      len(netblocks[most_frequent_netblock]),
+                                      fallback_count),
+                                    mask_bits,
+                                    self[most_frequent_netblock].ipv6addr))
+    if shared_netblock_fallback_count > 0:
+      logging.warning(('%s of fallbacks are in an IPv6 /%d with other ' +
+                       'fallbacks')%(CandidateList.describe_percentage(
+                                                shared_netblock_fallback_count,
+                                                fallback_count),
+                                     mask_bits))
+
+  # log a message about the proportion of fallbacks in each IPv4 /8, /16,
+  # and /24
+  def describe_fallback_ipv4_netblocks(self):
+   # this doesn't actually tell us anything useful
+   #self.describe_fallback_ipv4_netblock_mask(8)
+   self.describe_fallback_ipv4_netblock_mask(16)
+   self.describe_fallback_ipv4_netblock_mask(24)
+
+  # log a message about the proportion of fallbacks in each IPv6 /12 (RIR),
+  # /23 (smaller RIR blocks), /32 (LIR), /48 (Customer), and /64 (Host)
+  # https://www.iana.org/assignments/ipv6-unicast-address-assignments/
+  def describe_fallback_ipv6_netblocks(self):
+    # these don't actually tell us anything useful
+    #self.describe_fallback_ipv6_netblock_mask(12)
+    #self.describe_fallback_ipv6_netblock_mask(23)
+    self.describe_fallback_ipv6_netblock_mask(32)
+    self.describe_fallback_ipv6_netblock_mask(48)
+    self.describe_fallback_ipv6_netblock_mask(64)
+
+  # log a message about the proportion of fallbacks in each IPv4 and IPv6
+  # netblock
+  def describe_fallback_netblocks(self):
+    self.describe_fallback_ipv4_netblocks()
+    self.describe_fallback_ipv6_netblocks()
+
+  # return a list of fallbacks which are on the IPv4 ORPort port
+  def fallbacks_on_ipv4_orport(self, port):
+    return filter(lambda x: x.orport == port, self.fallbacks)
+
+  # return a list of fallbacks which are on the IPv6 ORPort port
+  def fallbacks_on_ipv6_orport(self, port):
+    return filter(lambda x: x.ipv6orport == port, self.fallbacks_with_ipv6())
+
+  # return a list of fallbacks which are on the DirPort port
+  def fallbacks_on_dirport(self, port):
+    return filter(lambda x: x.dirport == port, self.fallbacks)
+
+  # log a message about the proportion of fallbacks on IPv4 ORPort port
+  # and return that count
+  def describe_fallback_ipv4_orport(self, port):
+    port_count = len(self.fallbacks_on_ipv4_orport(port))
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks are on IPv4 ORPort %d'%(
+                    CandidateList.describe_percentage(port_count,
+                                                      fallback_count),
+                    port))
+    return port_count
+
+  # log a message about the proportion of IPv6 fallbacks on IPv6 ORPort port
+  # and return that count
+  def describe_fallback_ipv6_orport(self, port):
+    port_count = len(self.fallbacks_on_ipv6_orport(port))
+    fallback_count = len(self.fallbacks_with_ipv6())
+    logging.warning('%s of IPv6 fallbacks are on IPv6 ORPort %d'%(
+                    CandidateList.describe_percentage(port_count,
+                                                      fallback_count),
+                    port))
+    return port_count
+
+  # log a message about the proportion of fallbacks on DirPort port
+  # and return that count
+  def describe_fallback_dirport(self, port):
+    port_count = len(self.fallbacks_on_dirport(port))
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks are on DirPort %d'%(
+                    CandidateList.describe_percentage(port_count,
+                                                      fallback_count),
+                    port))
+    return port_count
+
+  # log a message about the proportion of fallbacks on each dirport,
+  # each IPv4 orport, and each IPv6 orport
+  def describe_fallback_ports(self):
+    fallback_count = len(self.fallbacks)
+    ipv4_or_count = fallback_count
+    ipv4_or_count -= self.describe_fallback_ipv4_orport(443)
+    ipv4_or_count -= self.describe_fallback_ipv4_orport(9001)
+    logging.warning('%s of fallbacks are on other IPv4 ORPorts'%(
+                    CandidateList.describe_percentage(ipv4_or_count,
+                                                      fallback_count)))
+    ipv6_fallback_count = len(self.fallbacks_with_ipv6())
+    ipv6_or_count = ipv6_fallback_count
+    ipv6_or_count -= self.describe_fallback_ipv6_orport(443)
+    ipv6_or_count -= self.describe_fallback_ipv6_orport(9001)
+    logging.warning('%s of IPv6 fallbacks are on other IPv6 ORPorts'%(
+                    CandidateList.describe_percentage(ipv6_or_count,
+                                                      ipv6_fallback_count)))
+    dir_count = fallback_count
+    dir_count -= self.describe_fallback_dirport(80)
+    dir_count -= self.describe_fallback_dirport(9030)
+    logging.warning('%s of fallbacks are on other DirPorts'%(
+                    CandidateList.describe_percentage(dir_count,
+                                                      fallback_count)))
+
+  # return a list of fallbacks which have the Exit flag
+  def fallbacks_with_exit(self):
+    return filter(lambda x: x.is_exit(), self.fallbacks)
+
+  # log a message about the proportion of fallbacks with an Exit flag
+  def describe_fallback_exit_flag(self):
+    exit_falback_count = len(self.fallbacks_with_exit())
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks have the Exit flag'%(
+                    CandidateList.describe_percentage(exit_falback_count,
+                                                      fallback_count)))
+
+  # return a list of fallbacks which have an IPv6 address
+  def fallbacks_with_ipv6(self):
+    return filter(lambda x: x.has_ipv6(), self.fallbacks)
+
+  # log a message about the proportion of fallbacks on IPv6
+  def describe_fallback_ip_family(self):
+    ipv6_falback_count = len(self.fallbacks_with_ipv6())
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks are on IPv6'%(
+                    CandidateList.describe_percentage(ipv6_falback_count,
+                                                      fallback_count)))
 
-  def summarise_fallbacks(self, eligible_count, guard_count, target_count,
-                          max_count):
+  def summarise_fallbacks(self, eligible_count, operator_count, failed_count,
+                          guard_count, target_count):
     # Report:
     #  whether we checked consensus download times
     #  the number of fallback directories (and limits/exclusions, if relevant)
@@ -1399,17 +1835,23 @@ class CandidateList(dict):
     if FALLBACK_PROPORTION_OF_GUARDS is None:
       fallback_proportion = ''
     else:
-      fallback_proportion = ', Target %d (%d * %f)'%(target_count, guard_count,
-                                                 FALLBACK_PROPORTION_OF_GUARDS)
-    s += 'Final Count: %d (Eligible %d%s'%(fallback_count,
-                                           eligible_count,
+      fallback_proportion = ', Target %d (%d * %.2f)'%(target_count,
+                                                guard_count,
+                                                FALLBACK_PROPORTION_OF_GUARDS)
+    s += 'Final Count: %d (Eligible %d%s'%(fallback_count, eligible_count,
                                            fallback_proportion)
     if MAX_FALLBACK_COUNT is not None:
-      s += ', Clamped to %d'%(MAX_FALLBACK_COUNT)
+      s += ', Max %d'%(MAX_FALLBACK_COUNT)
     s += ')\n'
     if eligible_count != fallback_count:
-      s += 'Excluded:     %d (Eligible Count Exceeded Target Count)'%(
-                                              eligible_count - fallback_count)
+      removed_count = eligible_count - fallback_count
+      excess_to_target_or_max = (eligible_count - operator_count - failed_count
+                                 - fallback_count)
+      # some 'Failed' failed the check, others 'Skipped' the check,
+      # if we already had enough successful downloads
+      s += ('Excluded: %d (Same Operator %d, Failed/Skipped Download %d, ' +
+            'Excess %d)')%(removed_count, operator_count, failed_count,
+                           excess_to_target_or_max)
       s += '\n'
     min_fb = self.fallback_min()
     min_bw = min_fb._data['measured_bandwidth']
@@ -1473,18 +1915,46 @@ def list_fallbacks():
   #  print json.dumps(candidates[x]._data, sort_keys=True, indent=4,
   #                   separators=(',', ': '), default=json_util.default)
 
+  # impose mandatory conditions here, like one per contact, family, IP
+  # in measured bandwidth order
+  candidates.sort_fallbacks_by_measured_bandwidth()
+  operator_count = 0
+  # only impose these limits on the final list - operators can nominate
+  # multiple candidate fallbacks, and then we choose the best set
+  if not OUTPUT_CANDIDATES:
+    operator_count += candidates.limit_fallbacks_same_ip()
+    operator_count += candidates.limit_fallbacks_same_contact()
+    operator_count += candidates.limit_fallbacks_same_family()
+
+  # check if each candidate can serve a consensus
+  # there's a small risk we've eliminated relays from the same operator that
+  # can serve a consensus, in favour of one that can't
+  # but given it takes up to 15 seconds to check each consensus download,
+  # the risk is worth it
+  failed_count = candidates.perform_download_consensus_checks(max_count)
+
+  # analyse and log interesting diversity metrics
+  # like netblock, ports, exit, IPv4-only
+  # (we can't easily analyse AS, and it's hard to accurately analyse country)
+  candidates.describe_fallback_ip_family()
+  # if we can't import the ipaddress module, we can't do netblock analysis
+  if HAVE_IPADDRESS:
+    candidates.describe_fallback_netblocks()
+  candidates.describe_fallback_ports()
+  candidates.describe_fallback_exit_flag()
+
+  # output C comments summarising the fallback selection process
   if len(candidates.fallbacks) > 0:
-    print candidates.summarise_fallbacks(eligible_count, guard_count,
-                                         target_count, max_count)
+    print candidates.summarise_fallbacks(eligible_count, operator_count,
+                                         failed_count, guard_count,
+                                         target_count)
   else:
     print '/* No Fallbacks met criteria */'
 
+  # output C comments specifying the OnionOO data used to create the list
   for s in fetch_source_list():
     print describe_fetch_source(s)
 
-  # check if each candidate can serve a consensus
-  candidates.perform_download_consensus_checks(max_count)
-
   # if we're outputting the final fallback list, sort by fingerprint
   # this makes diffs much more stable
   # otherwise, leave sorted by bandwidth, which allows operators to be





More information about the tor-commits mailing list