commit 6a27fc62456891f2eec9e2149a3c0eb9b66f6ee9 Author: teor teor2345@gmail.com Date: Sat Dec 23 01:21:49 2017 +1100
Add an extrainfo cache flag for each fallback in a C comment
This allows stem to use fallbacks to fetch extra-info documents, rather than using authorities.
Implements ticket 22759. --- changes/ticket22759 | 5 +++ scripts/maint/updateFallbackDirs.py | 75 ++++++++++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 1 deletion(-)
diff --git a/changes/ticket22759 b/changes/ticket22759 new file mode 100644 index 000000000..a393d428a --- /dev/null +++ b/changes/ticket22759 @@ -0,0 +1,5 @@ + o Minor features (fallback directory mirrors): + - Add an extrainfo cache flag for each fallback in a C comment. + This allows stem to use fallbacks to fetch extra-info documents, + rather than using authorities. + Implements ticket 22759. diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py index 4e4000dce..d1f50c70c 100755 --- a/scripts/maint/updateFallbackDirs.py +++ b/scripts/maint/updateFallbackDirs.py @@ -47,7 +47,7 @@ import copy import re
from stem.descriptor import DocumentHandler -from stem.descriptor.remote import get_consensus +from stem.descriptor.remote import get_consensus, get_server_descriptors, MAX_FINGERPRINTS
import logging logging.root.name = '' @@ -565,6 +565,7 @@ class Candidate(object): if not self.has_ipv6(): logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,)) self._compute_version() + self._extra_info_cache = None
def _stable_sort_or_addresses(self): # replace self._data['or_addresses'] with a stable ordering, @@ -1332,6 +1333,7 @@ class Candidate(object): # "address:dirport orport=port id=fingerprint" # "[ipv6=addr:orport]" # /* nickname=name */ + # /* extrainfo={0,1} */ # , # # Do we want a C string, or a commented-out string? @@ -1362,6 +1364,14 @@ class Candidate(object): if not comment_string: s += ' */' s += '\n' + # if we know that the fallback is an extrainfo cache, flag it + # and if we don't know, assume it is not + if not comment_string: + s += '/* ' + s += 'extrainfo=%d'%(1 if self._extra_info_cache else 0) + if not comment_string: + s += ' */' + s += '\n' s += ',' if comment_string: s += '\n' @@ -1747,6 +1757,53 @@ class CandidateList(dict): self.fallbacks = family_limit_fallbacks return original_count - len(self.fallbacks)
+ # try once to get the descriptors for fingerprint_list using stem + # returns an empty list on exception + @staticmethod + def get_fallback_descriptors_once(fingerprint_list): + desc_list = get_server_descriptors(fingerprints=fingerprint_list).run(suppress=True) + return desc_list + + # try up to max_retries times to get the descriptors for fingerprint_list + # using stem. Stops retrying when all descriptors have been retrieved. + # returns a list containing the descriptors that were retrieved + @staticmethod + def get_fallback_descriptors(fingerprint_list, max_retries=5): + # we can't use stem's retries=, because we want to support more than 96 + # descriptors + # + # add an attempt for every MAX_FINGERPRINTS (or part thereof) in the list + max_retries += (len(fingerprint_list) + MAX_FINGERPRINTS - 1) / MAX_FINGERPRINTS + remaining_list = fingerprint_list + desc_list = [] + for _ in xrange(max_retries): + if len(remaining_list) == 0: + break + new_desc_list = CandidateList.get_fallback_descriptors_once(remaining_list[0:MAX_FINGERPRINTS]) + for d in new_desc_list: + try: + remaining_list.remove(d.fingerprint) + except ValueError: + # warn and ignore if a directory mirror returned a bad descriptor + logging.warning("Directory mirror returned unwanted descriptor %s, ignoring", + d.fingerprint) + continue + desc_list.append(d) + return desc_list + + # find the fallbacks that cache extra-info documents + # Onionoo doesn't know this, so we have to use stem + def mark_extra_info_caches(self): + fingerprint_list = [ f._fpr for f in self.fallbacks ] + logging.info("Downloading fallback descriptors to find extra-info caches") + desc_list = CandidateList.get_fallback_descriptors(fingerprint_list) + for d in desc_list: + self[d.fingerprint]._extra_info_cache = d.extra_info_cache + missing_descriptor_list = [ f._fpr for f in self.fallbacks + if f._extra_info_cache is None ] + for f in missing_descriptor_list: + logging.warning("No descriptor for {}. Assuming extrainfo=0.".format(f)) + # try a download check on each fallback candidate in order # stop after max_count successful downloads # but don't remove any candidates from the array @@ -1994,6 +2051,18 @@ class CandidateList(dict): CandidateList.describe_percentage(dir_count, fallback_count)))
+ # return a list of fallbacks which cache extra-info documents + def fallbacks_with_extra_info_cache(self): + return filter(lambda x: x._extra_info_cache, self.fallbacks) + + # log a message about the proportion of fallbacks that cache extra-info docs + def describe_fallback_extra_info_caches(self): + extra_info_falback_count = len(self.fallbacks_with_extra_info_cache()) + fallback_count = len(self.fallbacks) + logging.warning('%s of fallbacks cache extra-info documents'%( + CandidateList.describe_percentage(extra_info_falback_count, + fallback_count))) + # return a list of fallbacks which have the Exit flag def fallbacks_with_exit(self): return filter(lambda x: x.is_exit(), self.fallbacks) @@ -2193,6 +2262,9 @@ def list_fallbacks(whitelist, blacklist): 'This may take some time.') failed_count = candidates.perform_download_consensus_checks(max_count)
+ # work out which fallbacks cache extra-infos + candidates.mark_extra_info_caches() + # analyse and log interesting diversity metrics # like netblock, ports, exit, IPv4-only # (we can't easily analyse AS, and it's hard to accurately analyse country) @@ -2201,6 +2273,7 @@ def list_fallbacks(whitelist, blacklist): if HAVE_IPADDRESS: candidates.describe_fallback_netblocks() candidates.describe_fallback_ports() + candidates.describe_fallback_extra_info_caches() candidates.describe_fallback_exit_flag()
# output C comments summarising the fallback selection process