[tor-commits] [tor/master] Add an extrainfo cache flag for each fallback in a C comment

nickm at torproject.org nickm at torproject.org
Fri Jan 5 21:47:34 UTC 2018


commit 6a27fc62456891f2eec9e2149a3c0eb9b66f6ee9
Author: teor <teor2345 at gmail.com>
Date:   Sat Dec 23 01:21:49 2017 +1100

    Add an extrainfo cache flag for each fallback in a C comment
    
    This allows stem to use fallbacks to fetch extra-info documents,
    rather than using authorities.
    
    Implements ticket 22759.
---
 changes/ticket22759                 |  5 +++
 scripts/maint/updateFallbackDirs.py | 75 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/changes/ticket22759 b/changes/ticket22759
new file mode 100644
index 000000000..a393d428a
--- /dev/null
+++ b/changes/ticket22759
@@ -0,0 +1,5 @@
+  o Minor features (fallback directory mirrors):
+    - Add an extrainfo cache flag for each fallback in a C comment.
+      This allows stem to use fallbacks to fetch extra-info documents,
+      rather than using authorities.
+      Implements ticket 22759.
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
index 4e4000dce..d1f50c70c 100755
--- a/scripts/maint/updateFallbackDirs.py
+++ b/scripts/maint/updateFallbackDirs.py
@@ -47,7 +47,7 @@ import copy
 import re
 
 from stem.descriptor import DocumentHandler
-from stem.descriptor.remote import get_consensus
+from stem.descriptor.remote import get_consensus, get_server_descriptors, MAX_FINGERPRINTS
 
 import logging
 logging.root.name = ''
@@ -565,6 +565,7 @@ class Candidate(object):
     if not self.has_ipv6():
       logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
     self._compute_version()
+    self._extra_info_cache = None
 
   def _stable_sort_or_addresses(self):
     # replace self._data['or_addresses'] with a stable ordering,
@@ -1332,6 +1333,7 @@ class Candidate(object):
     # "address:dirport orport=port id=fingerprint"
     # "[ipv6=addr:orport]"
     # /* nickname=name */
+    # /* extrainfo={0,1} */
     # ,
     #
     # Do we want a C string, or a commented-out string?
@@ -1362,6 +1364,14 @@ class Candidate(object):
     if not comment_string:
       s += ' */'
     s += '\n'
+    # if we know that the fallback is an extrainfo cache, flag it
+    # and if we don't know, assume it is not
+    if not comment_string:
+      s += '/* '
+    s += 'extrainfo=%d'%(1 if self._extra_info_cache else 0)
+    if not comment_string:
+      s += ' */'
+    s += '\n'
     s += ','
     if comment_string:
       s += '\n'
@@ -1747,6 +1757,53 @@ class CandidateList(dict):
     self.fallbacks = family_limit_fallbacks
     return original_count - len(self.fallbacks)
 
+  # try once to get the descriptors for fingerprint_list using stem
+  # returns an empty list on exception
+  @staticmethod
+  def get_fallback_descriptors_once(fingerprint_list):
+    desc_list = get_server_descriptors(fingerprints=fingerprint_list).run(suppress=True)
+    return desc_list
+
+  # try up to max_retries times to get the descriptors for fingerprint_list
+  # using stem. Stops retrying when all descriptors have been retrieved.
+  # returns a list containing the descriptors that were retrieved
+  @staticmethod
+  def get_fallback_descriptors(fingerprint_list, max_retries=5):
+    # we can't use stem's retries=, because we want to support more than 96
+    # descriptors
+    #
+    # add an attempt for every MAX_FINGERPRINTS (or part thereof) in the list
+    max_retries += (len(fingerprint_list) + MAX_FINGERPRINTS - 1) / MAX_FINGERPRINTS
+    remaining_list = fingerprint_list
+    desc_list = []
+    for _ in xrange(max_retries):
+      if len(remaining_list) == 0:
+        break
+      new_desc_list = CandidateList.get_fallback_descriptors_once(remaining_list[0:MAX_FINGERPRINTS])
+      for d in new_desc_list:
+        try:
+          remaining_list.remove(d.fingerprint)
+        except ValueError:
+          # warn and ignore if a directory mirror returned a bad descriptor
+          logging.warning("Directory mirror returned unwanted descriptor %s, ignoring",
+                          d.fingerprint)
+          continue
+        desc_list.append(d)
+    return desc_list
+
+  # find the fallbacks that cache extra-info documents
+  # Onionoo doesn't know this, so we have to use stem
+  def mark_extra_info_caches(self):
+    fingerprint_list = [ f._fpr for f in self.fallbacks ]
+    logging.info("Downloading fallback descriptors to find extra-info caches")
+    desc_list = CandidateList.get_fallback_descriptors(fingerprint_list)
+    for d in desc_list:
+      self[d.fingerprint]._extra_info_cache = d.extra_info_cache
+    missing_descriptor_list = [ f._fpr for f in self.fallbacks
+                                if f._extra_info_cache is None ]
+    for f in missing_descriptor_list:
+      logging.warning("No descriptor for {}. Assuming extrainfo=0.".format(f))
+
   # try a download check on each fallback candidate in order
   # stop after max_count successful downloads
   # but don't remove any candidates from the array
@@ -1994,6 +2051,18 @@ class CandidateList(dict):
                     CandidateList.describe_percentage(dir_count,
                                                       fallback_count)))
 
+  # return a list of fallbacks which cache extra-info documents
+  def fallbacks_with_extra_info_cache(self):
+    return filter(lambda x: x._extra_info_cache, self.fallbacks)
+
+  # log a message about the proportion of fallbacks that cache extra-info docs
+  def describe_fallback_extra_info_caches(self):
+    extra_info_falback_count = len(self.fallbacks_with_extra_info_cache())
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks cache extra-info documents'%(
+                    CandidateList.describe_percentage(extra_info_falback_count,
+                                                      fallback_count)))
+
   # return a list of fallbacks which have the Exit flag
   def fallbacks_with_exit(self):
     return filter(lambda x: x.is_exit(), self.fallbacks)
@@ -2193,6 +2262,9 @@ def list_fallbacks(whitelist, blacklist):
                     'This may take some time.')
   failed_count = candidates.perform_download_consensus_checks(max_count)
 
+  # work out which fallbacks cache extra-infos
+  candidates.mark_extra_info_caches()
+
   # analyse and log interesting diversity metrics
   # like netblock, ports, exit, IPv4-only
   # (we can't easily analyse AS, and it's hard to accurately analyse country)
@@ -2201,6 +2273,7 @@ def list_fallbacks(whitelist, blacklist):
   if HAVE_IPADDRESS:
     candidates.describe_fallback_netblocks()
   candidates.describe_fallback_ports()
+  candidates.describe_fallback_extra_info_caches()
   candidates.describe_fallback_exit_flag()
 
   # output C comments summarising the fallback selection process





More information about the tor-commits mailing list