commit 7248afc02a8195d2cead58b50b843576038153f0 Author: juga0 juga@riseup.net Date: Tue Mar 16 19:08:06 2021 +0000
fix: Reformat all with black --- sbws/__init__.py | 10 +- sbws/_version.py | 164 +++-- sbws/core/cleanup.py | 104 ++-- sbws/core/generate.py | 224 ++++--- sbws/core/scanner.py | 504 ++++++++++------ sbws/core/stats.py | 130 ++-- sbws/globals.py | 79 +-- sbws/lib/bwfile_health.py | 4 +- sbws/lib/circuitbuilder.py | 39 +- sbws/lib/destination.py | 246 +++++--- sbws/lib/heartbeat.py | 17 +- sbws/lib/relaylist.py | 160 +++-- sbws/lib/relayprioritizer.py | 51 +- sbws/lib/resultdump.py | 558 ++++++++++------- sbws/lib/scaling.py | 3 +- sbws/lib/v3bwfile.py | 805 +++++++++++++++---------- sbws/sbws.py | 47 +- sbws/util/config.py | 542 ++++++++++------- sbws/util/filelock.py | 27 +- sbws/util/fs.py | 80 +-- sbws/util/iso3166.py | 273 ++++++++- sbws/util/parser.py | 24 +- sbws/util/requests.py | 17 +- sbws/util/state.py | 4 +- sbws/util/stem.py | 151 +++-- sbws/util/userquery.py | 25 +- tests/conftest.py | 13 +- tests/integration/conftest.py | 49 +- tests/integration/core/test_scanner.py | 43 +- tests/integration/lib/test_destination.py | 72 ++- tests/integration/lib/test_relaylist.py | 13 +- tests/integration/lib/test_relayprioritizer.py | 52 +- tests/integration/util/test_requests.py | 12 +- tests/integration/util/test_stem.py | 8 +- tests/unit/conftest.py | 196 +++--- tests/unit/core/test_generate.py | 16 +- tests/unit/core/test_stats.py | 51 +- tests/unit/lib/test_destination.py | 2 +- tests/unit/lib/test_heartbeat.py | 8 +- tests/unit/lib/test_relaylist.py | 4 +- tests/unit/lib/test_resultdump.py | 47 +- tests/unit/lib/test_results.py | 215 ++++--- tests/unit/lib/test_scaling.py | 23 +- tests/unit/lib/test_v3bwfile.py | 499 ++++++++------- tests/unit/test_bwfile_health.py | 12 +- tests/unit/util/test_config.py | 322 ++++++---- tests/unit/util/test_state.py | 64 +- tests/unit/util/test_stem.py | 15 +- tests/unit/util/test_timestamp.py | 18 +- tests/unit/util/test_userquery.py | 82 +-- 50 files changed, 3768 insertions(+), 2356 deletions(-)
diff --git a/sbws/__init__.py b/sbws/__init__.py index 4e6608e..80de913 100644 --- a/sbws/__init__.py +++ b/sbws/__init__.py @@ -1,5 +1,6 @@ from ._version import get_versions -__version__ = get_versions()['version'] + +__version__ = get_versions()["version"] del get_versions
import threading # noqa @@ -19,6 +20,7 @@ class Settings: should be initialized here.
""" + def __init__(self): # update this dict from globals (but only for ALL_CAPS settings) for setting in dir(globals): @@ -27,9 +29,9 @@ class Settings: self.end_event = threading.Event()
def init_http_headers(self, nickname, uuid, tor_version): - self.HTTP_HEADERS['Tor-Bandwidth-Scanner-Nickname'] = nickname - self.HTTP_HEADERS['Tor-Bandwidth-Scanner-UUID'] = uuid - self.HTTP_HEADERS['User-Agent'] += tor_version + self.HTTP_HEADERS["Tor-Bandwidth-Scanner-Nickname"] = nickname + self.HTTP_HEADERS["Tor-Bandwidth-Scanner-UUID"] = uuid + self.HTTP_HEADERS["User-Agent"] += tor_version
def set_end_event(self): self.end_event.set() diff --git a/sbws/_version.py b/sbws/_version.py index cbf260a..052f2b0 100644 --- a/sbws/_version.py +++ b/sbws/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -58,17 +57,20 @@ HANDLERS = {}
def register_vcs_handler(vcs, method): # decorator """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands, args, cwd=None, verbose=False, hide_stderr=False, env=None +): """Call the given command(s).""" assert isinstance(commands, list) p = None @@ -76,10 +78,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -114,16 +119,22 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } else: rootdirs.append(root) root = os.path.dirname(root) # up a level
if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@@ -183,7 +194,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -192,7 +203,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -200,19 +211,26 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + }
@register_vcs_handler("git", "pieces_from_vcs") @@ -227,8 +245,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"]
- out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + out, rc = run_command( + GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True + ) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -236,10 +255,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -262,17 +290,18 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = ( + "unable to parse git-describe output: '%s'" % describe_out + ) return pieces
# tag @@ -281,10 +310,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :]
# distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -295,13 +326,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) + count_out, rc = run_command( + GITS, ["rev-list", "HEAD", "--count"], cwd=root + ) pieces["distance"] = int(count_out) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] @@ -335,8 +368,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -450,11 +482,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + }
if not style or style == "default": style = "pep440" # the default @@ -474,9 +508,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style)
- return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + }
def get_versions(): @@ -490,8 +528,9 @@ def get_versions(): verbose = cfg.verbose
try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) + return git_versions_from_keywords( + get_keywords(), cfg.tag_prefix, verbose + ) except NotThisMethod: pass
@@ -500,13 +539,16 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): + for i in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + }
try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -520,6 +562,10 @@ def get_versions(): except NotThisMethod: pass
- return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/sbws/core/cleanup.py b/sbws/core/cleanup.py index 1d21989..8b7e6fd 100644 --- a/sbws/core/cleanup.py +++ b/sbws/core/cleanup.py @@ -17,22 +17,30 @@ log = logging.getLogger(__name__)
def gen_parser(sub): - ''' + """ Helper function for the broader argument parser generating code that adds in all the possible command line arguments for the cleanup command.
:param argparse._SubParsersAction sub: what to add a sub-parser to - ''' - d = 'Compress and delete results and/or v3bw files old files.' \ - 'Configuration options are read to determine which are old files' - p = sub.add_parser('cleanup', description=d, - formatter_class=ArgumentDefaultsHelpFormatter) - p.add_argument('--dry-run', action='store_true', - help='Don't actually compress or delete anything') - p.add_argument('--no-results', action='store_true', - help='Do not clean results files') - p.add_argument('--no-v3bw', action='store_true', - help='Do not clean v3bw files') + """ + d = ( + "Compress and delete results and/or v3bw files old files." + "Configuration options are read to determine which are old files" + ) + p = sub.add_parser( + "cleanup", description=d, formatter_class=ArgumentDefaultsHelpFormatter + ) + p.add_argument( + "--dry-run", + action="store_true", + help="Don't actually compress or delete anything", + ) + p.add_argument( + "--no-results", action="store_true", help="Do not clean results files" + ) + p.add_argument( + "--no-v3bw", action="store_true", help="Do not clean v3bw files" + )
def _get_files_mtime_older_than(dname, days_delta, extensions): @@ -43,7 +51,7 @@ def _get_files_mtime_older_than(dname, days_delta, extensions): assert isinstance(extensions, list) for ext in extensions: assert isinstance(ext, str) - assert ext[0] == '.' + assert ext[0] == "." # Determine oldest allowed date today = datetime.utcfromtimestamp(time.time()) oldest_day = today - timedelta(days=days_delta) @@ -52,13 +60,17 @@ def _get_files_mtime_older_than(dname, days_delta, extensions): fname = os.path.join(root, f) _, ext = os.path.splitext(fname) if ext not in extensions: - log.debug('Ignoring %s because its extension is not in ' - '%s', fname, extensions) + log.debug( + "Ignoring %s because its extension is not in " "%s", + fname, + extensions, + ) continue # using file modification time instead of parsing the name # of the file. filedt = unixts_to_dt_obj( - os.stat(fname, follow_symlinks=False).st_mtime) + os.stat(fname, follow_symlinks=False).st_mtime + ) if filedt < oldest_day: yield fname
@@ -69,7 +81,7 @@ def _delete_files(dname, files, dry_run=True): assert isinstance(files, types.GeneratorType) with DirectoryLock(dname): for fname in files: - log.info('Deleting %s', fname) + log.info("Deleting %s", fname) assert os.path.commonprefix([dname, fname]) == dname if not dry_run: os.remove(fname) @@ -81,13 +93,13 @@ def _compress_files(dname, files, dry_run=True): assert isinstance(files, types.GeneratorType) with DirectoryLock(dname): for fname in files: - log.info('Compressing %s', fname) + log.info("Compressing %s", fname) assert os.path.commonprefix([dname, fname]) == dname if dry_run: continue - with open(fname, 'rt') as in_fd: - out_fname = fname + '.gz' - with gzip.open(out_fname, 'wt') as out_fd: + with open(fname, "rt") as in_fd: + out_fname = fname + ".gz" + with gzip.open(out_fname, "wt") as out_fd: shutil.copyfileobj(in_fd, out_fd) os.remove(fname)
@@ -95,60 +107,64 @@ def _compress_files(dname, files, dry_run=True): def _check_validity_periods_v3bw(compress_after_days, delete_after_days): if 1 <= compress_after_days and compress_after_days < delete_after_days: return True - fail_hard("v3bw files should only be compressed after 1 day and deleted " - "after a bigger number of days.") + fail_hard( + "v3bw files should only be compressed after 1 day and deleted " + "after a bigger number of days." + )
def _clean_v3bw_files(args, conf): - v3bw_dname = conf.getpath('paths', 'v3bw_dname') + v3bw_dname = conf.getpath("paths", "v3bw_dname") if not os.path.isdir(v3bw_dname): - fail_hard('%s does not exist', v3bw_dname) - compress_after_days = conf.getint('cleanup', - 'v3bw_files_compress_after_days') - delete_after_days = conf.getint('cleanup', - 'v3bw_files_delete_after_days') + fail_hard("%s does not exist", v3bw_dname) + compress_after_days = conf.getint( + "cleanup", "v3bw_files_compress_after_days" + ) + delete_after_days = conf.getint("cleanup", "v3bw_files_delete_after_days") _check_validity_periods_v3bw(compress_after_days, delete_after_days) # first delete so that the files to be deleted are not compressed first - files_to_delete = _get_files_mtime_older_than(v3bw_dname, - delete_after_days, - ['.v3bw', '.gz']) + files_to_delete = _get_files_mtime_older_than( + v3bw_dname, delete_after_days, [".v3bw", ".gz"] + ) _delete_files(v3bw_dname, files_to_delete, dry_run=args.dry_run) - files_to_compress = _get_files_mtime_older_than(v3bw_dname, - compress_after_days, - ['.v3bw']) + files_to_compress = _get_files_mtime_older_than( + v3bw_dname, compress_after_days, [".v3bw"] + ) # when dry_run is true, compress will also show all the files that # would have been deleted, since they are not really deleted _compress_files(v3bw_dname, files_to_compress, dry_run=args.dry_run)
def _clean_result_files(args, conf): - datadir = conf.getpath('paths', 'datadir') + datadir = conf.getpath("paths", "datadir") if not os.path.isdir(datadir): - fail_hard('%s does not exist', datadir) + fail_hard("%s does not exist", datadir) compress_after_days = conf.getint( - 'cleanup', 'data_files_compress_after_days') - delete_after_days = conf.getint( - 'cleanup', 'data_files_delete_after_days') + "cleanup", "data_files_compress_after_days" + ) + delete_after_days = conf.getint("cleanup", "data_files_delete_after_days")
# first delete so that the files to be deleted are not compressed first files_to_delete = _get_files_mtime_older_than( - datadir, delete_after_days, ['.txt', '.gz']) + datadir, delete_after_days, [".txt", ".gz"] + ) _delete_files(datadir, files_to_delete, dry_run=args.dry_run)
# when dry_run is true, compress will also show all the files that # would have been deleted, since they are not really deleted files_to_compress = _get_files_mtime_older_than( - datadir, compress_after_days, ['.txt']) + datadir, compress_after_days, [".txt"] + ) _compress_files(datadir, files_to_compress, dry_run=args.dry_run)
def main(args, conf): - ''' + """ Main entry point in to the cleanup command.
:param argparse.Namespace args: command line arguments :param configparser.ConfigParser conf: parsed config files - ''' + """ if not args.no_results: _clean_result_files(args, conf)
diff --git a/sbws/core/generate.py b/sbws/core/generate.py index 8fa8ec4..55bf719 100644 --- a/sbws/core/generate.py +++ b/sbws/core/generate.py @@ -1,8 +1,16 @@ from math import ceil
-from sbws.globals import (fail_hard, SBWS_SCALE_CONSTANT, TORFLOW_SCALING, - SBWS_SCALING, TORFLOW_BW_MARGIN, PROP276_ROUND_DIG, - DAY_SECS, NUM_MIN_RESULTS, GENERATE_PERIOD) +from sbws.globals import ( + fail_hard, + SBWS_SCALE_CONSTANT, + TORFLOW_SCALING, + SBWS_SCALING, + TORFLOW_BW_MARGIN, + PROP276_ROUND_DIG, + DAY_SECS, + NUM_MIN_RESULTS, + GENERATE_PERIOD, +) from sbws.lib.v3bwfile import V3BWFile from sbws.lib.resultdump import load_recent_results_in_datadir from argparse import ArgumentDefaultsHelpFormatter @@ -15,72 +23,119 @@ log = logging.getLogger(__name__)
def gen_parser(sub): - d = 'Generate a v3bw file based on recent results. A v3bw file is the '\ - 'file Tor directory authorities want to read and base their '\ - 'bandwidth votes on. '\ - 'To avoid inconsistent reads, configure tor with '\ - '"V3BandwidthsFile /path/to/latest.v3bw". '\ - '(latest.v3bw is an atomically created symlink in the same '\ - 'directory as output.) '\ - 'If the file is transferred to another host, it should be written to '\ - 'a temporary path, then renamed to the V3BandwidthsFile path.\n'\ - 'The default scaling method is torflow's one. To use different'\ - 'scaling methods or no scaling, see the options.' - p = sub.add_parser('generate', description=d, - formatter_class=ArgumentDefaultsHelpFormatter) - p.add_argument('--output', default=None, type=str, - help='If specified, write the v3bw here instead of what is' - 'specified in the configuration') + d = ( + "Generate a v3bw file based on recent results. A v3bw file is the " + "file Tor directory authorities want to read and base their " + "bandwidth votes on. " + "To avoid inconsistent reads, configure tor with " + '"V3BandwidthsFile /path/to/latest.v3bw". ' + "(latest.v3bw is an atomically created symlink in the same " + "directory as output.) " + "If the file is transferred to another host, it should be written to " + "a temporary path, then renamed to the V3BandwidthsFile path.\n" + "The default scaling method is torflow's one. To use different" + "scaling methods or no scaling, see the options." + ) + p = sub.add_parser( + "generate", + description=d, + formatter_class=ArgumentDefaultsHelpFormatter, + ) + p.add_argument( + "--output", + default=None, + type=str, + help="If specified, write the v3bw here instead of what is" + "specified in the configuration", + ) # The reason for --scale-constant defaulting to 7500 is because at one # time, torflow happened to generate output that averaged to 7500 bw units # per relay. We wanted the ability to try to be like torflow. See # https://lists.torproject.org/pipermail/tor-dev/2018-March/013049.html - p.add_argument('--scale-constant', default=SBWS_SCALE_CONSTANT, type=int, - help='When scaling bw weights, scale them using this const ' - 'multiplied by the number of measured relays') - p.add_argument('--scale-sbws', action='store_true', - help='If specified, do not use bandwidth values as they ' - 'are, but scale them such that we have a budget of ' - 'scale_constant * num_measured_relays = bandwidth to give ' - 'out, and we do so proportionally') - p.add_argument('-t', '--scale-torflow', action='store_true', - default=True, - help='If specified, scale measurements using torflow's ' - 'method. This option is kept for compatibility with older ' - 'versions and it is silently ignored, since it is the ' - 'default.') - p.add_argument('-w', '--raw', action='store_true', - help='If specified, do use bandwidth raw measurements ' - 'without any scaling.') - p.add_argument('-m', '--torflow-bw-margin', default=TORFLOW_BW_MARGIN, - type=float, - help="Cap maximum bw when scaling as Torflow. ") - p.add_argument('-r', '--round-digs', '--torflow-round-digs', - default=PROP276_ROUND_DIG, type=int, - help="Number of most significant digits to round bw.") - p.add_argument('-p', '--secs-recent', default=None, type=int, - help="How many secs in the past are results being " - "still considered. Default is {} secs. If not scaling " - "as Torflow the default is data_period in the " - "configuration.".format(GENERATE_PERIOD)) - p.add_argument('-a', '--secs-away', default=DAY_SECS, type=int, - help="How many secs results have to be away from each " - "other.") - p.add_argument('-n', '--min-num', default=NUM_MIN_RESULTS, type=int, - help="Mininum number of a results to consider them.") + p.add_argument( + "--scale-constant", + default=SBWS_SCALE_CONSTANT, + type=int, + help="When scaling bw weights, scale them using this const " + "multiplied by the number of measured relays", + ) + p.add_argument( + "--scale-sbws", + action="store_true", + help="If specified, do not use bandwidth values as they " + "are, but scale them such that we have a budget of " + "scale_constant * num_measured_relays = bandwidth to give " + "out, and we do so proportionally", + ) + p.add_argument( + "-t", + "--scale-torflow", + action="store_true", + default=True, + help="If specified, scale measurements using torflow's " + "method. This option is kept for compatibility with older " + "versions and it is silently ignored, since it is the " + "default.", + ) + p.add_argument( + "-w", + "--raw", + action="store_true", + help="If specified, do use bandwidth raw measurements " + "without any scaling.", + ) + p.add_argument( + "-m", + "--torflow-bw-margin", + default=TORFLOW_BW_MARGIN, + type=float, + help="Cap maximum bw when scaling as Torflow. ", + ) + p.add_argument( + "-r", + "--round-digs", + "--torflow-round-digs", + default=PROP276_ROUND_DIG, + type=int, + help="Number of most significant digits to round bw.", + ) + p.add_argument( + "-p", + "--secs-recent", + default=None, + type=int, + help="How many secs in the past are results being " + "still considered. Default is {} secs. If not scaling " + "as Torflow the default is data_period in the " + "configuration.".format(GENERATE_PERIOD), + ) + p.add_argument( + "-a", + "--secs-away", + default=DAY_SECS, + type=int, + help="How many secs results have to be away from each " "other.", + ) + p.add_argument( + "-n", + "--min-num", + default=NUM_MIN_RESULTS, + type=int, + help="Mininum number of a results to consider them.", + ) return p
def main(args, conf): - os.makedirs(conf.getpath('paths', 'v3bw_dname'), exist_ok=True) + os.makedirs(conf.getpath("paths", "v3bw_dname"), exist_ok=True)
- datadir = conf.getpath('paths', 'datadir') + datadir = conf.getpath("paths", "datadir") if not os.path.isdir(datadir): - fail_hard('%s does not exist', datadir) + fail_hard("%s does not exist", datadir) if args.scale_constant < 1: - fail_hard('--scale-constant must be positive') + fail_hard("--scale-constant must be positive") if args.torflow_bw_margin < 0: - fail_hard('toflow-bw-margin must be major than 0.') + fail_hard("toflow-bw-margin must be major than 0.") if args.scale_sbws: scaling_method = SBWS_SCALING elif args.raw: @@ -94,34 +149,45 @@ def main(args, conf): elif scaling_method == TORFLOW_SCALING: fresh_days = ceil(GENERATE_PERIOD / 24 / 60 / 60) else: - fresh_days = conf.getint('general', 'data_period') - reset_bw_ipv4_changes = conf.getboolean('general', 'reset_bw_ipv4_changes') - reset_bw_ipv6_changes = conf.getboolean('general', 'reset_bw_ipv6_changes') + fresh_days = conf.getint("general", "data_period") + reset_bw_ipv4_changes = conf.getboolean("general", "reset_bw_ipv4_changes") + reset_bw_ipv6_changes = conf.getboolean("general", "reset_bw_ipv6_changes") results = load_recent_results_in_datadir( - fresh_days, datadir, + fresh_days, + datadir, on_changed_ipv4=reset_bw_ipv4_changes, - on_changed_ipv6=reset_bw_ipv6_changes) + on_changed_ipv6=reset_bw_ipv6_changes, + ) if len(results) < 1: - log.warning('No recent results, so not generating anything. (Have you ' - 'ran sbws scanner recently?)') + log.warning( + "No recent results, so not generating anything. (Have you " + "ran sbws scanner recently?)" + ) return - state_fpath = conf.getpath('paths', 'state_fname') - consensus_path = os.path.join(conf.getpath('tor', 'datadir'), - "cached-consensus") + state_fpath = conf.getpath("paths", "state_fname") + consensus_path = os.path.join( + conf.getpath("tor", "datadir"), "cached-consensus" + ) # Accept None as scanner_country to be compatible with older versions. - scanner_country = conf['scanner'].get('country') + scanner_country = conf["scanner"].get("country") destinations_countries = destination.parse_destinations_countries(conf) - bw_file = V3BWFile.from_results(results, scanner_country, - destinations_countries, state_fpath, - args.scale_constant, scaling_method, - torflow_cap=args.torflow_bw_margin, - round_digs=args.round_digs, - secs_recent=args.secs_recent, - secs_away=args.secs_away, - min_num=args.min_num, - consensus_path=consensus_path) + bw_file = V3BWFile.from_results( + results, + scanner_country, + destinations_countries, + state_fpath, + args.scale_constant, + scaling_method, + torflow_cap=args.torflow_bw_margin, + round_digs=args.round_digs, + secs_recent=args.secs_recent, + secs_away=args.secs_away, + min_num=args.min_num, + consensus_path=consensus_path, + )
- output = args.output or \ - conf.getpath('paths', 'v3bw_fname').format(now_fname()) + output = args.output or conf.getpath("paths", "v3bw_fname").format( + now_fname() + ) bw_file.write(output) bw_file.info_stats diff --git a/sbws/core/scanner.py b/sbws/core/scanner.py index 2fba10f..ab5a99a 100644 --- a/sbws/core/scanner.py +++ b/sbws/core/scanner.py @@ -1,4 +1,4 @@ -''' Measure the relays. ''' +""" Measure the relays. """ import queue
import signal @@ -12,13 +12,19 @@ from multiprocessing.context import TimeoutError from ..lib.circuitbuilder import GapsCircuitBuilder as CB from ..lib.resultdump import ResultDump from ..lib.resultdump import ( - ResultSuccess, ResultErrorCircuit, ResultErrorStream, - ResultErrorSecondRelay, ResultError, ResultErrorDestination - ) + ResultSuccess, + ResultErrorCircuit, + ResultErrorStream, + ResultErrorSecondRelay, + ResultError, + ResultErrorDestination, +) from ..lib.relaylist import RelayList from ..lib.relayprioritizer import RelayPrioritizer -from ..lib.destination import (DestinationList, - connect_to_destination_over_circuit) +from ..lib.destination import ( + DestinationList, + connect_to_destination_over_circuit, +) from ..util.timestamp import now_isodt_str from ..util.state import State from sbws.globals import fail_hard, HTTP_GET_HEADERS, TIMEOUT_MEASUREMENTS @@ -50,7 +56,7 @@ traceback."""
def stop_threads(signal, frame, exit_code=0): global rd, pool - log.debug('Stopping sbws.') + log.debug("Stopping sbws.") # Avoid new threads to start. settings.set_end_event() # Stop Pool threads @@ -70,13 +76,15 @@ def dumpstacks(): log.critical(FILLUP_TICKET_MSG) thread_id2name = dict([(t.ident, t.name) for t in threading.enumerate()]) for thread_id, stack in sys._current_frames().items(): - log.critical("Thread: %s(%d)", - thread_id2name.get(thread_id, ""), thread_id) + log.critical( + "Thread: %s(%d)", thread_id2name.get(thread_id, ""), thread_id + ) log.critical(traceback.format_stack("".join(stack))) # If logging level is less than DEBUG (more verbose), start pdb so that # developers can debug the issue. if log.getEffectiveLevel() < logging.DEBUG: import pdb + pdb.set_trace() # Otherwise exit. else: @@ -85,12 +93,12 @@ def dumpstacks():
def timed_recv_from_server(session, dest, byte_range): - ''' Request the **byte_range** from the URL at **dest**. If successful, + """Request the **byte_range** from the URL at **dest**. If successful, return True and the time it took to download. Otherwise return False and an - exception. ''' + exception."""
start_time = time.time() - HTTP_GET_HEADERS['Range'] = byte_range + HTTP_GET_HEADERS["Range"] = byte_range # - response.elapsed "measures the time taken between sending the first # byte of the request and finishing parsing the headers. # It is therefore unaffected by consuming the response content" @@ -111,13 +119,13 @@ def timed_recv_from_server(session, dest, byte_range):
def get_random_range_string(content_length, size): - ''' + """ Return a random range of bytes of length **size**. **content_length** is the size of the file we will be requesting a range of bytes from.
For example, for content_length of 100 and size 10, this function will return one of the following: '0-9', '1-10', '2-11', [...] '89-98', '90-99' - ''' + """ assert size <= content_length # start can be anywhere in the content_length as long as it is **size** # bytes away from the end or more. Because range is [start, end) (doesn't @@ -131,11 +139,11 @@ def get_random_range_string(content_length, size): # of the content. For example, if content_length is 10, end could be # anywhere from 0 to 9. assert end < content_length - return 'bytes={}-{}'.format(start, end) + return "bytes={}-{}".format(start, end)
def measure_rtt_to_server(session, conf, dest, content_length): - ''' Make multiple end-to-end RTT measurements by making small HTTP requests + """Make multiple end-to-end RTT measurements by making small HTTP requests over a circuit + stream that should already exist, persist, and not need rebuilding. If something goes wrong and not all of the RTT measurements can be made, return None. Otherwise return a list of the RTTs (in seconds). @@ -143,18 +151,21 @@ def measure_rtt_to_server(session, conf, dest, content_length): :returns tuple: results or None if the if the measurement fail. None or exception if the measurement fail.
- ''' + """ rtts = [] - size = conf.getint('scanner', 'min_download_size') - for _ in range(0, conf.getint('scanner', 'num_rtts')): - log.debug('Measuring RTT to %s', dest.url) + size = conf.getint("scanner", "min_download_size") + for _ in range(0, conf.getint("scanner", "num_rtts")): + log.debug("Measuring RTT to %s", dest.url) random_range = get_random_range_string(content_length, size) success, data = timed_recv_from_server(session, dest, random_range) if not success: # data is an exception - log.debug('While measuring the RTT to %s we hit an exception ' - '(does the webserver support Range requests?): %s', - dest.url, data) + log.debug( + "While measuring the RTT to %s we hit an exception " + "(does the webserver support Range requests?): %s", + dest.url, + data, + ) return None, data assert success # data is an RTT @@ -170,15 +181,15 @@ def measure_bandwidth_to_server(session, conf, dest, content_length):
""" results = [] - num_downloads = conf.getint('scanner', 'num_downloads') - expected_amount = conf.getint('scanner', 'initial_read_request') - min_dl = conf.getint('scanner', 'min_download_size') - max_dl = conf.getint('scanner', 'max_download_size') + num_downloads = conf.getint("scanner", "num_downloads") + expected_amount = conf.getint("scanner", "initial_read_request") + min_dl = conf.getint("scanner", "min_download_size") + max_dl = conf.getint("scanner", "max_download_size") download_times = { - 'toofast': conf.getfloat('scanner', 'download_toofast'), - 'min': conf.getfloat('scanner', 'download_min'), - 'target': conf.getfloat('scanner', 'download_target'), - 'max': conf.getfloat('scanner', 'download_max'), + "toofast": conf.getfloat("scanner", "download_toofast"), + "min": conf.getfloat("scanner", "download_min"), + "target": conf.getfloat("scanner", "download_target"), + "max": conf.getfloat("scanner", "download_max"), } while len(results) < num_downloads and not settings.end_event.is_set(): assert expected_amount >= min_dl @@ -187,47 +198,58 @@ def measure_bandwidth_to_server(session, conf, dest, content_length): success, data = timed_recv_from_server(session, dest, random_range) if not success: # data is an exception - log.debug('While measuring the bandwidth to %s we hit an ' - 'exception (does the webserver support Range ' - 'requests?): %s', dest.url, data) + log.debug( + "While measuring the bandwidth to %s we hit an " + "exception (does the webserver support Range " + "requests?): %s", + dest.url, + data, + ) return None, data assert success # data is a download time assert isinstance(data, float) or isinstance(data, int) if _should_keep_result( - expected_amount == max_dl, data, download_times): - results.append({ - 'duration': data, 'amount': expected_amount}) + expected_amount == max_dl, data, download_times + ): + results.append({"duration": data, "amount": expected_amount}) expected_amount = _next_expected_amount( - expected_amount, data, download_times, min_dl, max_dl) + expected_amount, data, download_times, min_dl, max_dl + ) return results, None
def _pick_ideal_second_hop(relay, dest, rl, cont, is_exit): - ''' + """ Sbws builds two hop circuits. Given the **relay** to measure with destination **dest**, pick a second relay that is or is not an exit according to **is_exit**. - ''' + """ # 40041: Instead of using exits that can exit to all IPs, to ensure that # they can make requests to the Web servers, try with the exits that # allow some IPs, since there're more. # In the case that a concrete exit can't exit to the Web server, it is not # a problem since the relay will be measured in the next loop with other # random exit. - candidates = rl.exits_not_bad_allowing_port(dest.port) \ - if is_exit else rl.non_exits + candidates = ( + rl.exits_not_bad_allowing_port(dest.port) if is_exit else rl.non_exits + ) if not len(candidates): return None # In the case the helper is an exit, the entry could be an exit too # (#40041), so ensure the helper is not the same as the entry, likely to # happen in a test network. if is_exit: - candidates = [c for c in candidates - if c.fingerprint != relay.fingerprint] + candidates = [ + c for c in candidates if c.fingerprint != relay.fingerprint + ] min_relay_bw = rl.exit_min_bw() if is_exit else rl.non_exit_min_bw() - log.debug('Picking a 2nd hop to measure %s from %d choices. is_exit=%s', - relay.nickname, len(candidates), is_exit) + log.debug( + "Picking a 2nd hop to measure %s from %d choices. is_exit=%s", + relay.nickname, + len(candidates), + is_exit, + ) for min_bw_factor in [2, 1.75, 1.5, 1.25, 1]: min_bw = relay.consensus_bandwidth * min_bw_factor # We might have a really slow/new relay. Try to measure it properly by @@ -236,34 +258,44 @@ def _pick_ideal_second_hop(relay, dest, rl, cont, is_exit): if min_bw < min_relay_bw: min_bw = min_relay_bw new_candidates = stem_utils.only_relays_with_bandwidth( - cont, candidates, min_bw=min_bw) + cont, candidates, min_bw=min_bw + ) if len(new_candidates) > 0: chosen = rng.choice(new_candidates) log.debug( - 'Found %d candidate 2nd hops with at least %sx the bandwidth ' - 'of %s. Returning %s (bw=%s).', - len(new_candidates), min_bw_factor, relay.nickname, - chosen.nickname, chosen.consensus_bandwidth) + "Found %d candidate 2nd hops with at least %sx the bandwidth " + "of %s. Returning %s (bw=%s).", + len(new_candidates), + min_bw_factor, + relay.nickname, + chosen.nickname, + chosen.consensus_bandwidth, + ) return chosen - candidates = sorted(candidates, key=lambda r: r.consensus_bandwidth, - reverse=True) + candidates = sorted( + candidates, key=lambda r: r.consensus_bandwidth, reverse=True + ) chosen = candidates[0] log.debug( - 'Didn't find any 2nd hops at least as fast as %s (bw=%s). It's ' - 'probably really fast. Returning %s (bw=%s), the fastest ' - 'candidate we have.', relay.nickname, relay.consensus_bandwidth, - chosen.nickname, chosen.consensus_bandwidth) + "Didn't find any 2nd hops at least as fast as %s (bw=%s). It's " + "probably really fast. Returning %s (bw=%s), the fastest " + "candidate we have.", + relay.nickname, + relay.consensus_bandwidth, + chosen.nickname, + chosen.consensus_bandwidth, + ) return chosen
def error_no_helper(relay, dest, our_nick=""): - reason = 'Unable to select a second relay' - log.debug(reason + ' to help measure %s (%s)', - relay.fingerprint, relay.nickname) + reason = "Unable to select a second relay" + log.debug( + reason + " to help measure %s (%s)", relay.fingerprint, relay.nickname + ) return [ - ResultErrorSecondRelay(relay, [], dest.url, our_nick, - msg=reason), - ] + ResultErrorSecondRelay(relay, [], dest.url, our_nick, msg=reason), + ]
def create_path_relay(relay, dest, rl, cb, relay_as_entry=True): @@ -272,7 +304,8 @@ def create_path_relay(relay, dest, rl, cb, relay_as_entry=True): # and False when the relay is not the entry, ie. is the exit (helper does # not have to be an exit) helper = _pick_ideal_second_hop( - relay, dest, rl, cb.controller, is_exit=relay_as_entry) + relay, dest, rl, cb.controller, is_exit=relay_as_entry + ) if not helper: return error_no_helper(relay, dest) if relay_as_entry: @@ -287,11 +320,14 @@ def create_path_relay(relay, dest, rl, cb, relay_as_entry=True):
def error_no_circuit(circ_fps, nicknames, reason, relay, dest, our_nick): - log.debug('Could not build circuit with path %s (%s): %s ', - circ_fps, nicknames, reason) + log.debug( + "Could not build circuit with path %s (%s): %s ", + circ_fps, + nicknames, + reason, + ) return [ - ResultErrorCircuit(relay, circ_fps, dest.url, our_nick, - msg=reason), + ResultErrorCircuit(relay, circ_fps, dest.url, our_nick, msg=reason), ]
@@ -303,10 +339,11 @@ def measure_relay(args, conf, destinations, cb, rl, relay): :return Result: a measurement Result object
""" - log.debug('Measuring %s %s', relay.nickname, relay.fingerprint) - our_nick = conf['scanner']['nickname'] + log.debug("Measuring %s %s", relay.nickname, relay.fingerprint) + our_nick = conf["scanner"]["nickname"] s = requests_utils.make_session( - cb.controller, conf.getfloat('general', 'http_timeout')) + cb.controller, conf.getfloat("general", "http_timeout") + ) # Probably because the scanner is stopping. if s is None: if settings.end_event.is_set(): @@ -314,12 +351,12 @@ def measure_relay(args, conf, destinations, cb, rl, relay): else: # In future refactor this should be returned from the make_session reason = "Unable to get proxies." - log.debug(reason + ' to measure %s %s', - relay.nickname, relay.fingerprint) + log.debug( + reason + " to measure %s %s", relay.nickname, relay.fingerprint + ) return [ - ResultError(relay, [], '', our_nick, - msg=reason), - ] + ResultError(relay, [], "", our_nick, msg=reason), + ] # Pick a destionation dest = destinations.next() # When there're no any functional destinations. @@ -332,13 +369,15 @@ def measure_relay(args, conf, destinations, cb, rl, relay): # It might be useful to store the fact that the destinations fail, # so store here the error, and set the signal once the error is stored # (in `resultump`). - log.critical("There are not any functional destinations.\n" - "It is recommended to set several destinations so that " - "the scanner can continue if one fails.") + log.critical( + "There are not any functional destinations.\n" + "It is recommended to set several destinations so that " + "the scanner can continue if one fails." + ) reason = "No functional destinations" # Resultdump will set end_event after storing the error return [ - ResultErrorDestination(relay, [], '', our_nick, msg=reason), + ResultErrorDestination(relay, [], "", our_nick, msg=reason), ]
# Pick a relay to help us measure the given relay. If the given relay is an @@ -364,24 +403,37 @@ def measure_relay(args, conf, destinations, cb, rl, relay): # Here we won't have the case that an exit tried to build the circuit as # entry and failed (#40029), cause not checking that it can exit all IPs. if not circ_id: - return error_no_circuit(circ_fps, nicknames, reason, relay, dest, - our_nick) - log.debug('Built circuit with path %s (%s) to measure %s (%s)', - circ_fps, nicknames, relay.fingerprint, relay.nickname) + return error_no_circuit( + circ_fps, nicknames, reason, relay, dest, our_nick + ) + log.debug( + "Built circuit with path %s (%s) to measure %s (%s)", + circ_fps, + nicknames, + relay.fingerprint, + relay.nickname, + ) # Make a connection to the destination is_usable, usable_data = connect_to_destination_over_circuit( - dest, circ_id, s, cb.controller, dest._max_dl) + dest, circ_id, s, cb.controller, dest._max_dl + )
# In the case that the relay was used as an exit, but could not exit # to the Web server, try again using it as entry, to avoid that it would # always fail when there's only one Web server. - if not is_usable and \ - relay.is_exit_not_bad_allowing_port(dest.port): + if not is_usable and relay.is_exit_not_bad_allowing_port(dest.port): log.debug( "Exit %s (%s) that can't exit all ips, with exit policy %s, failed" " to connect to %s via circuit %s (%s). Reason: %s. Trying again " - "with it as entry.", relay.fingerprint, relay.nickname, - exit_policy, dest.url, circ_fps, nicknames, usable_data) + "with it as entry.", + relay.fingerprint, + relay.nickname, + exit_policy, + dest.url, + circ_fps, + nicknames, + usable_data, + ) r = create_path_relay(relay, dest, rl, cb) if len(r) == 1: return r @@ -390,56 +442,99 @@ def measure_relay(args, conf, destinations, cb, rl, relay): if not circ_id: log.info( "Exit %s (%s) that can't exit all ips, failed to create " - " circuit as entry: %s (%s).", relay.fingerprint, - relay.nickname, circ_fps, nicknames) - return error_no_circuit(circ_fps, nicknames, reason, relay, dest, - our_nick) + " circuit as entry: %s (%s).", + relay.fingerprint, + relay.nickname, + circ_fps, + nicknames, + ) + return error_no_circuit( + circ_fps, nicknames, reason, relay, dest, our_nick + )
- log.debug('Built circuit with path %s (%s) to measure %s (%s)', - circ_fps, nicknames, relay.fingerprint, relay.nickname) + log.debug( + "Built circuit with path %s (%s) to measure %s (%s)", + circ_fps, + nicknames, + relay.fingerprint, + relay.nickname, + ) is_usable, usable_data = connect_to_destination_over_circuit( - dest, circ_id, s, cb.controller, dest._max_dl) + dest, circ_id, s, cb.controller, dest._max_dl + ) if not is_usable: - log.debug('Failed to connect to %s to measure %s (%s) via circuit ' - '%s (%s). Exit policy: %s. Reason: %s.', dest.url, - relay.fingerprint, relay.nickname, circ_fps, nicknames, - exit_policy, usable_data) + log.debug( + "Failed to connect to %s to measure %s (%s) via circuit " + "%s (%s). Exit policy: %s. Reason: %s.", + dest.url, + relay.fingerprint, + relay.nickname, + circ_fps, + nicknames, + exit_policy, + usable_data, + ) cb.close_circuit(circ_id) return [ - ResultErrorStream(relay, circ_fps, dest.url, our_nick, - msg=usable_data), + ResultErrorStream( + relay, circ_fps, dest.url, our_nick, msg=usable_data + ), ] assert is_usable - assert 'content_length' in usable_data + assert "content_length" in usable_data # FIRST: measure RTT - rtts, reason = measure_rtt_to_server(s, conf, dest, - usable_data['content_length']) + rtts, reason = measure_rtt_to_server( + s, conf, dest, usable_data["content_length"] + ) if rtts is None: - log.debug('Unable to measure RTT for %s (%s) to %s via circuit ' - '%s (%s): %s', relay.fingerprint, relay.nickname, - dest.url, circ_fps, nicknames, reason) + log.debug( + "Unable to measure RTT for %s (%s) to %s via circuit " + "%s (%s): %s", + relay.fingerprint, + relay.nickname, + dest.url, + circ_fps, + nicknames, + reason, + ) cb.close_circuit(circ_id) return [ - ResultErrorStream(relay, circ_fps, dest.url, our_nick, - msg=str(reason)), + ResultErrorStream( + relay, circ_fps, dest.url, our_nick, msg=str(reason) + ), ] # SECOND: measure bandwidth bw_results, reason = measure_bandwidth_to_server( - s, conf, dest, usable_data['content_length']) + s, conf, dest, usable_data["content_length"] + ) if bw_results is None: - log.debug('Failed to measure %s (%s) via circuit %s (%s) to %s. Exit' - ' policy: %s. Reason: %s.', relay.fingerprint, - relay.nickname, circ_fps, nicknames, dest.url, exit_policy, - reason) + log.debug( + "Failed to measure %s (%s) via circuit %s (%s) to %s. Exit" + " policy: %s. Reason: %s.", + relay.fingerprint, + relay.nickname, + circ_fps, + nicknames, + dest.url, + exit_policy, + reason, + ) cb.close_circuit(circ_id) return [ - ResultErrorStream(relay, circ_fps, dest.url, our_nick, - msg=str(reason)), + ResultErrorStream( + relay, circ_fps, dest.url, our_nick, msg=str(reason) + ), ] cb.close_circuit(circ_id) # Finally: store result - log.debug('Success measurement for %s (%s) via circuit %s (%s) to %s', - relay.fingerprint, relay.nickname, circ_fps, nicknames, dest.url) + log.debug( + "Success measurement for %s (%s) via circuit %s (%s) to %s", + relay.fingerprint, + relay.nickname, + circ_fps, + nicknames, + dest.url, + ) return [ ResultSuccess(rtts, bw_results, relay, circ_fps, dest.url, our_nick), ] @@ -467,35 +562,44 @@ def _should_keep_result(did_request_maximum, result_time, download_times): # should only allow ourselves to keep results that are between the min and # max allowed time msg = "Keeping measurement time {:.2f}".format(result_time) - if not did_request_maximum and \ - result_time >= download_times['min'] and \ - result_time < download_times['max']: + if ( + not did_request_maximum + and result_time >= download_times["min"] + and result_time < download_times["max"] + ): log.debug(msg) return True # If we did request the maximum amount, we should keep the result as long # as it took less than the maximum amount of time - if did_request_maximum and \ - result_time < download_times['max']: + if did_request_maximum and result_time < download_times["max"]: log.debug(msg) return True # In all other cases, return false - log.debug('Not keeping result time %f.%s', result_time, - '' if not did_request_maximum else ' We requested the maximum ' - 'amount allowed.') + log.debug( + "Not keeping result time %f.%s", + result_time, + "" + if not did_request_maximum + else " We requested the maximum " "amount allowed.", + ) return False
-def _next_expected_amount(expected_amount, result_time, download_times, - min_dl, max_dl): - if result_time < download_times['toofast']: +def _next_expected_amount( + expected_amount, result_time, download_times, min_dl, max_dl +): + if result_time < download_times["toofast"]: # Way too fast, greatly increase the amount we ask for expected_amount = int(expected_amount * 5) - elif result_time < download_times['min'] or \ - result_time >= download_times['max']: + elif ( + result_time < download_times["min"] + or result_time >= download_times["max"] + ): # As long as the result is between min/max, keep the expected amount # the same. Otherwise, adjust so we are aiming for the target amount. expected_amount = int( - expected_amount * download_times['target'] / result_time) + expected_amount * download_times["target"] / result_time + ) # Make sure we don't request too much or too little expected_amount = max(min_dl, expected_amount) expected_amount = min(max_dl, expected_amount) @@ -503,8 +607,8 @@ def _next_expected_amount(expected_amount, result_time, download_times,
def result_putter(result_dump): - ''' Create a function that takes a single argument -- the measurement - result -- and return that function so it can be used by someone else ''' + """Create a function that takes a single argument -- the measurement + result -- and return that function so it can be used by someone else"""
def closure(measurement_result): # Since result_dump thread is calling queue.get() every second, @@ -518,15 +622,17 @@ def result_putter(result_dump): "The queue with measurements is full, when adding %s.\n" "It is possible that the thread that get them to " "write them to the disk (ResultDump.enter) is stalled.", - measurement_result - ) + measurement_result, + ) + return closure
def result_putter_error(target): - ''' Create a function that takes a single argument -- an error from a + """Create a function that takes a single argument -- an error from a measurement -- and return that function so it can be used by someone else - ''' + """ + def closure(object): if settings.end_event.is_set(): return @@ -536,14 +642,28 @@ def result_putter_error(target): log.warning(FILLUP_TICKET_MSG) # To print the traceback that happened in the thread, not here in # the main process. - log.warning("".join(traceback.format_exception( - type(object), object, object.__traceback__)) + log.warning( + "".join( + traceback.format_exception( + type(object), object, object.__traceback__ + ) ) + ) + return closure
-def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump, - relay_prioritizer, destinations, pool): +def main_loop( + args, + conf, + controller, + relay_list, + circuit_builder, + result_dump, + relay_prioritizer, + destinations, + pool, +): """Starts and reuse the threads that measure the relays forever.
It starts a loop that will be run while there is not and event signaling @@ -579,7 +699,7 @@ def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump,
""" log.info("Started the main loop to measure the relays.") - hbeat = Heartbeat(conf.getpath('paths', 'state_fname')) + hbeat = Heartbeat(conf.getpath("paths", "state_fname"))
# Set the time to wait for a thread to finish as the half of an HTTP # request timeout. @@ -608,8 +728,18 @@ def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump, callback_err = result_putter_error(target) async_result = pool.apply_async( dispatch_worker_thread, - [args, conf, destinations, circuit_builder, relay_list, - target], {}, callback, callback_err) + [ + args, + conf, + destinations, + circuit_builder, + relay_list, + target, + ], + {}, + callback, + callback_err, + ) pending_results.append(async_result)
# Register this measurement to the heartbeat module @@ -629,10 +759,13 @@ def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump, loop_tdelta = (loop_tstop - loop_tstart) / 60 # At this point, we know the relays that were queued to be measured. # That does not mean they were actually measured. - log.debug("Attempted to measure %s relays in %s minutes", - num_relays, loop_tdelta) + log.debug( + "Attempted to measure %s relays in %s minutes", + num_relays, + loop_tdelta, + ) # In a testing network, exit after first loop - if controller.get_conf('TestingTorNetwork') == '1': + if controller.get_conf("TestingTorNetwork") == "1": log.info("In a testing network, exiting after the first loop.") # Threads should be closed nicely in some refactor stop_threads(signal.SIGTERM, None) @@ -679,8 +812,11 @@ def wait_for_results(num_relays_to_measure, pending_results): """ num_last_measured = 1 while num_last_measured > 0 and not settings.end_event.is_set(): - log.info("Pending measurements: %s out of %s: ", - len(pending_results), num_relays_to_measure) + log.info( + "Pending measurements: %s out of %s: ", + len(pending_results), + num_relays_to_measure, + ) time.sleep(TIMEOUT_MEASUREMENTS) old_pending_results = pending_results pending_results = [r for r in pending_results if not r.ready()] @@ -706,8 +842,7 @@ def force_get_results(pending_results): for r in pending_results: try: result = r.get(timeout=0.1) - log.warning("Result %s was not stored, it took too long.", - result) + log.warning("Result %s was not stored, it took too long.", result) # TimeoutError is raised when the result is not ready, ie. has not # been processed yet except TimeoutError: @@ -722,8 +857,11 @@ def force_get_results(pending_results): # not have the traceback. # Using `format_exception` instead of of `print_exception` to show # the traceback in all the log handlers. - log.warning("".join(traceback.format_exception( - type(e), e, e.__traceback__))) + log.warning( + "".join( + traceback.format_exception(type(e), e, e.__traceback__) + ) + )
def run_speedtest(args, conf): @@ -749,27 +887,29 @@ def run_speedtest(args, conf):
# When there will be a refactor where conf is global, this can be removed # from here. - state = State(conf.getpath('paths', 'state_fname')) + state = State(conf.getpath("paths", "state_fname")) # XXX: tech-debt: create new function to obtain the controller and to # write the state, so that a unit test to check the state tor version can # be created # Store tor version whenever the scanner starts. - state['tor_version'] = str(controller.get_version()) + state["tor_version"] = str(controller.get_version()) # Call only once to initialize http_headers - settings.init_http_headers(conf.get('scanner', 'nickname'), state['uuid'], - state['tor_version']) + settings.init_http_headers( + conf.get("scanner", "nickname"), state["uuid"], state["tor_version"] + ) # To do not have to pass args and conf to RelayList, pass an extra # argument with the data_period - measurements_period = conf.getint('general', 'data_period') + measurements_period = conf.getint("general", "data_period") rl = RelayList(args, conf, controller, measurements_period, state) cb = CB(args, conf, controller, rl) rd = ResultDump(args, conf) rp = RelayPrioritizer(args, conf, rl, rd) destinations, error_msg = DestinationList.from_config( - conf, cb, rl, controller) + conf, cb, rl, controller + ) if not destinations: fail_hard(error_msg) - max_pending_results = conf.getint('scanner', 'measurement_threads') + max_pending_results = conf.getint("scanner", "measurement_threads") pool = Pool(max_pending_results) try: main_loop(args, conf, controller, rl, cb, rd, rp, destinations, pool) @@ -785,30 +925,36 @@ def run_speedtest(args, conf):
def gen_parser(sub): - d = 'The scanner side of sbws. This should be run on a well-connected '\ - 'machine on the Internet with a healthy amount of spare bandwidth. '\ - 'This continuously builds circuits, measures relays, and dumps '\ - 'results into a datadir, commonly found in ~/.sbws' - sub.add_parser('scanner', formatter_class=ArgumentDefaultsHelpFormatter, - description=d) + d = ( + "The scanner side of sbws. This should be run on a well-connected " + "machine on the Internet with a healthy amount of spare bandwidth. " + "This continuously builds circuits, measures relays, and dumps " + "results into a datadir, commonly found in ~/.sbws" + ) + sub.add_parser( + "scanner", formatter_class=ArgumentDefaultsHelpFormatter, description=d + )
def main(args, conf): - if conf.getint('scanner', 'measurement_threads') < 1: - fail_hard('Number of measurement threads must be larger than 1') + if conf.getint("scanner", "measurement_threads") < 1: + fail_hard("Number of measurement threads must be larger than 1")
- min_dl = conf.getint('scanner', 'min_download_size') - max_dl = conf.getint('scanner', 'max_download_size') + min_dl = conf.getint("scanner", "min_download_size") + max_dl = conf.getint("scanner", "max_download_size") if max_dl < min_dl: - fail_hard('Max download size %d cannot be smaller than min %d', - max_dl, min_dl) + fail_hard( + "Max download size %d cannot be smaller than min %d", + max_dl, + min_dl, + )
- os.makedirs(conf.getpath('paths', 'datadir'), exist_ok=True) + os.makedirs(conf.getpath("paths", "datadir"), exist_ok=True)
- state = State(conf.getpath('paths', 'state_fname')) - state['scanner_started'] = now_isodt_str() + state = State(conf.getpath("paths", "state_fname")) + state["scanner_started"] = now_isodt_str() # Generate an unique identifier for each scanner - if 'uuid' not in state: - state['uuid'] = str(uuid.uuid4()) + if "uuid" not in state: + state["uuid"] = str(uuid.uuid4())
run_speedtest(args, conf) diff --git a/sbws/core/stats.py b/sbws/core/stats.py index f865b92..9f80885 100644 --- a/sbws/core/stats.py +++ b/sbws/core/stats.py @@ -16,23 +16,29 @@ log = logging.getLogger(__name__)
def _print_stats_error_types(data): - counts = {'total': 0} + counts = {"total": 0} for fp in data: results = data[fp] for result in results: if result.type not in counts: - log.debug('Found a %s for the first time', result.type) + log.debug("Found a %s for the first time", result.type) counts[result.type] = 0 counts[result.type] += 1 - counts['total'] += 1 + counts["total"] += 1 for count_type in counts: - if count_type == 'total': + if count_type == "total": continue - if 'error' not in count_type: + if "error" not in count_type: continue number = counts[count_type] - print('{}/{} ({:.2f}%) results were {}'.format( - number, counts['total'], 100*number/counts['total'], count_type)) + print( + "{}/{} ({:.2f}%) results were {}".format( + number, + counts["total"], + 100 * number / counts["total"], + count_type, + ) + )
def _result_type_per_relay(data, result_type): @@ -43,10 +49,10 @@ def _result_type_per_relay(data, result_type):
def _get_box_plot_values(iterable): - ''' Reutrn the min, q1, med, q1, and max of the input list or iterable. + """Reutrn the min, q1, med, q1, and max of the input list or iterable. This function is NOT perfect, and I think that's fine for basic statistical needs. Instead of median, it will return low or high median. Same for q1 - and q3. ''' + and q3.""" if not isinstance(iterable, list): iterable = list(iterable) iterable.sort() @@ -54,23 +60,35 @@ def _get_box_plot_values(iterable): median_idx = round(length / 2) q1_idx = round(length / 4) q3_idx = median_idx + q1_idx - return [iterable[0], iterable[q1_idx], iterable[median_idx], - iterable[q3_idx], iterable[length-1]] + return [ + iterable[0], + iterable[q1_idx], + iterable[median_idx], + iterable[q3_idx], + iterable[length - 1], + ]
def _print_results_type_box_plot(data, result_type): per_relay = _result_type_per_relay(data, result_type) bp = _get_box_plot_values(per_relay.values()) - print('For {}: min={} q1={} med={} q3={} max={}'.format( - result_type.__name__, *bp)) + print( + "For {}: min={} q1={} med={} q3={} max={}".format( + result_type.__name__, *bp + ) + )
def _print_averages(data): - mean_success = mean([ - len([r for r in data[fp] if isinstance(r, ResultSuccess)]) - for fp in data]) - print('Mean {:.2f} successful measurements per ' - 'relay'.format(mean_success)) + mean_success = mean( + [ + len([r for r in data[fp] if isinstance(r, ResultSuccess)]) + for fp in data + ] + ) + print( + "Mean {:.2f} successful measurements per " "relay".format(mean_success) + ) _print_results_type_box_plot(data, Result) _print_results_type_box_plot(data, ResultSuccess) _print_results_type_box_plot(data, ResultErrorCircuit) @@ -78,31 +96,31 @@ def _print_averages(data):
def _results_into_bandwidths(results, limit=5): - ''' + """ For all the given resutls, extract their download statistics and normalize them into bytes/second bandwidths.
:param list results: list of :class:`sbws.list.resultdump.ResultSuccess` :param int limit: The maximum number of bandwidths to return :returns: list of up to `limit` bandwidths, with the largest first - ''' + """ downloads = [] for result in results: assert isinstance(result, ResultSuccess) for dl in result.downloads: - downloads.append(dl['amount'] / dl['duration']) + downloads.append(dl["amount"] / dl["duration"]) return sorted(downloads, reverse=True)[:limit]
def print_stats(args, data): - ''' + """ Called from main to print various statistics about the organized **data** to stdout.
:param argparse.Namespace args: command line arguments :param dict data: keyed by relay fingerprint, and with values of :class:`sbws.lib.resultdump.Result` subclasses - ''' + """ results = [] for fp in data: results.extend(data[fp]) @@ -111,8 +129,9 @@ def print_stats(args, data): success_results = [r for r in results if isinstance(r, ResultSuccess)] percent_success_results = 100 * len(success_results) / len(results) fastest_transfers = _results_into_bandwidths(success_results) - fastest_transfer = 0 if len(fastest_transfers) < 1 else \ - fastest_transfers[0] + fastest_transfer = ( + 0 if len(fastest_transfers) < 1 else fastest_transfers[0] + ) first_time = min([r.time for r in results]) last_time = max([r.time for r in results]) first = datetime.utcfromtimestamp(first_time) @@ -120,50 +139,63 @@ def print_stats(args, data): last = datetime.utcfromtimestamp(last_time) last = last - timedelta(microseconds=last.microsecond) duration = last - first - print(len(data), 'relays have recent results') + print(len(data), "relays have recent results") _print_averages(data) - print(len(results), 'total results, and {:.1f}% are successes'.format( - percent_success_results)) - print(len(success_results), 'success results and', - len(error_results), 'error results') - print('The fastest download was {:.2f} KiB/s'.format( - fastest_transfer/1024)) - print('Results come from', first, 'to', last, 'over a period of', - duration) - if getattr(args, 'error_types', False) is True: + print( + len(results), + "total results, and {:.1f}% are successes".format( + percent_success_results + ), + ) + print( + len(success_results), + "success results and", + len(error_results), + "error results", + ) + print( + "The fastest download was {:.2f} KiB/s".format(fastest_transfer / 1024) + ) + print("Results come from", first, "to", last, "over a period of", duration) + if getattr(args, "error_types", False) is True: _print_stats_error_types(data)
def gen_parser(sub): - ''' + """ Helper function for the broader argument parser generating code that adds in all the possible command line arguments for the stats command.
:param argparse._SubParsersAction sub: what to add a sub-parser to - ''' - d = 'Write some statistics about the data collected so far to stdout' - p = sub.add_parser('stats', formatter_class=ArgumentDefaultsHelpFormatter, - description=d) - p.add_argument('--error-types', action='store_true', - help='Also print information about each error type') + """ + d = "Write some statistics about the data collected so far to stdout" + p = sub.add_parser( + "stats", formatter_class=ArgumentDefaultsHelpFormatter, description=d + ) + p.add_argument( + "--error-types", + action="store_true", + help="Also print information about each error type", + )
def main(args, conf): - ''' + """ Main entry point into the stats command.
:param argparse.Namespace args: command line arguments :param configparser.ConfigParser conf: parsed config files - ''' + """
- datadir = conf.getpath('paths', 'datadir') + datadir = conf.getpath("paths", "datadir") if not os.path.isdir(datadir): - fail_hard('%s does not exist', datadir) + fail_hard("%s does not exist", datadir)
- fresh_days = conf.getint('general', 'data_period') + fresh_days = conf.getint("general", "data_period") results = load_recent_results_in_datadir( - fresh_days, datadir, success_only=False) + fresh_days, datadir, success_only=False + ) if len(results) < 1: - log.warning('No fresh results') + log.warning("No fresh results") return print_stats(args, results) diff --git a/sbws/globals.py b/sbws/globals.py index b7b1097..cb6a4c3 100644 --- a/sbws/globals.py +++ b/sbws/globals.py @@ -15,7 +15,7 @@ log = logging.getLogger(__name__)
RESULT_VERSION = 4 WIRE_VERSION = 1 -SPEC_VERSION = '1.5.0' +SPEC_VERSION = "1.5.0"
# This is a dictionary of torrc options we always want to set when launching # Tor and that do not depend on any runtime configuration @@ -24,52 +24,54 @@ SPEC_VERSION = '1.5.0' TORRC_STARTING_POINT = { # We will find out via the ControlPort and not setting something static # means a lower chance of conflict - 'SocksPort': 'auto', + "SocksPort": "auto", # Easier than password authentication - 'CookieAuthentication': '1', + "CookieAuthentication": "1", # To avoid path bias warnings - 'UseEntryGuards': '0', + "UseEntryGuards": "0", # Because we need things from full server descriptors (namely for now: the # bandwidth line) - 'UseMicrodescriptors': '0', + "UseMicrodescriptors": "0", # useful logging options for clients that don't care about anonymity - 'SafeLogging': '0', - 'LogTimeGranularity': '1', - 'ProtocolWarnings': '1', + "SafeLogging": "0", + "LogTimeGranularity": "1", + "ProtocolWarnings": "1", # To be able to responde to MaxAdvertisedBandwidth as soon as possible. # If ``FetchDirInfoExtraEarly` is set, but not # `FetchDirInfoEarly`, Tor will throw this error: # `FetchDirInfoExtraEarly requires that you also set FetchDirInfoEarly` - 'FetchDirInfoEarly': '1', - 'FetchDirInfoExtraEarly': '1', + "FetchDirInfoEarly": "1", + "FetchDirInfoExtraEarly": "1", # To make Tor keep fetching descriptors, even when idle. - 'FetchUselessDescriptors': '1', + "FetchUselessDescriptors": "1", # Things needed to make circuits fail a little faster. We get the # circuit_timeout as a string instead of an int on purpose: stem only # accepts strings. - 'LearnCircuitBuildTimeout': '0', + "LearnCircuitBuildTimeout": "0", } # Options that need to be set at runtime. TORRC_RUNTIME_OPTIONS = { # The scanner builds the circuits to download the data itself, # so do not let Tor to build them. - '__DisablePredictedCircuits': '1', + "__DisablePredictedCircuits": "1", # The scanner attach the streams to the circuit itself, # so do not let Tor to attache them. - '__LeaveStreamsUnattached': '1', + "__LeaveStreamsUnattached": "1", } # Options that can be set at runtime and can fail with some Tor versions # The ones that fail will be ignored.. -TORRC_OPTIONS_CAN_FAIL = OrderedDict({ - # Since currently scanner anonymity is not the goal, ConnectionPadding - # is disable to do not send extra traffic - 'ConnectionPadding': '0' - }) +TORRC_OPTIONS_CAN_FAIL = OrderedDict( + { + # Since currently scanner anonymity is not the goal, ConnectionPadding + # is disable to do not send extra traffic + "ConnectionPadding": "0" + } +)
PKG_DIR = os.path.abspath(os.path.dirname(__file__)) -DEFAULT_CONFIG_PATH = os.path.join(PKG_DIR, 'config.default.ini') -DEFAULT_LOG_CONFIG_PATH = os.path.join(PKG_DIR, 'config.log.default.ini') -USER_CONFIG_PATH = os.path.join(os.path.expanduser('~'), '.sbws.ini') +DEFAULT_CONFIG_PATH = os.path.join(PKG_DIR, "config.default.ini") +DEFAULT_LOG_CONFIG_PATH = os.path.join(PKG_DIR, "config.log.default.ini") +USER_CONFIG_PATH = os.path.join(os.path.expanduser("~"), ".sbws.ini") SUPERVISED_USER_CONFIG_PATH = "/etc/sbws/sbws.ini" SUPERVISED_RUN_DPATH = "/run/sbws/tor"
@@ -126,25 +128,28 @@ GENERATE_PERIOD = 28 * 24 * 60 * 60 # In Requests these keys are case insensitive. HTTP_HEADERS = { # This would be ignored if changing to HTTP/2 - 'Connection': 'keep-alive', + "Connection": "keep-alive", # Needs to get Tor version from the controller - 'User-Agent': 'sbws/{} ({}) Python/{} Requests/{} Stem/{} Tor/'.format( - __version__, platform.platform(), - platform.python_version(), - requests_version, stem_version), + "User-Agent": "sbws/{} ({}) Python/{} Requests/{} Stem/{} Tor/".format( + __version__, + platform.platform(), + platform.python_version(), + requests_version, + stem_version, + ), # Organization defined names (:rfc:`7239`) # Needs to get the nickname from the user config file. - 'Tor-Bandwidth-Scanner-Nickname': '{}', - 'Tor-Bandwidth-Scanner-UUID': '{}', + "Tor-Bandwidth-Scanner-Nickname": "{}", + "Tor-Bandwidth-Scanner-UUID": "{}", # In case of including IP address. # 'Forwarded': 'for={}' # IPv6 part, if there's - } +} # In the case of having ipv6 it's concatenated to forwarder. IPV6_FORWARDED = ', for="[{}]"'
HTTP_GET_HEADERS = { - 'Range': '{}', - 'Accept-Encoding': 'identity', + "Range": "{}", + "Accept-Encoding": "identity", } DESTINATION_VERIFY_CERTIFICATE = True # This number might need adjusted depending on the percentage of circuits and @@ -197,13 +202,13 @@ RELAY_TYPES = [G, M, E, GE]
def fail_hard(*a, **kw): - ''' Log something ... and then exit as fast as possible ''' + """ Log something ... and then exit as fast as possible """ log.critical(*a, **kw) exit(1)
def touch_file(fname, times=None): - ''' + """ If **fname** exists, update its last access and modified times to now. If **fname** does not exist, create it. If **times** are specified, pass them to os.utime for use. @@ -211,7 +216,7 @@ def touch_file(fname, times=None): :param str fname: Name of file to update or create :param tuple times: 2-tuple of floats for access time and modified time respectively - ''' - log.debug('Touching %s', fname) - with open(fname, 'a') as fd: + """ + log.debug("Touching %s", fname) + with open(fname, "a") as fd: os.utime(fd.fileno(), times=times) diff --git a/sbws/lib/bwfile_health.py b/sbws/lib/bwfile_health.py index 95d37ee..1c060ab 100644 --- a/sbws/lib/bwfile_health.py +++ b/sbws/lib/bwfile_health.py @@ -14,7 +14,9 @@ from sbws.globals import ( ) from sbws.lib.v3bwfile import HEADER_INT_KEYS, BWLINE_KEYS_V1_4
-logging.basicConfig(level=logging.INFO,) +logging.basicConfig( + level=logging.INFO, +) logger = logging.getLogger(__name__)
diff --git a/sbws/lib/circuitbuilder.py b/sbws/lib/circuitbuilder.py index 81742f1..d96e5ec 100644 --- a/sbws/lib/circuitbuilder.py +++ b/sbws/lib/circuitbuilder.py @@ -10,7 +10,7 @@ def valid_circuit_length(path):
class CircuitBuilder: - ''' The CircuitBuilder interface. + """The CircuitBuilder interface.
Subclasses must implement their own build_circuit() function. Subclasses may keep additional state if they'd find it helpful. @@ -22,14 +22,21 @@ class CircuitBuilder: It might be good practice to close circuits as you find you no longer need them, but CircuitBuilder will keep track of existing circuits and close them when it is deleted. - ''' + """ + # XXX: In new major version, remove args and conf, they are not used. - def __init__(self, args, conf, controller, relay_list=None, - close_circuits_on_exit=True): + def __init__( + self, + args, + conf, + controller, + relay_list=None, + close_circuits_on_exit=True, + ): self.controller = controller self.built_circuits = set() self.close_circuits_on_exit = close_circuits_on_exit - self.circuit_timeout = conf.getint('general', 'circuit_timeout') + self.circuit_timeout = conf.getint("general", "circuit_timeout")
def close_circuit(self, circ_id): try: @@ -48,13 +55,17 @@ class CircuitBuilder: return None, "Can not build a circuit, invalid path." c = self.controller timeout = self.circuit_timeout - fp_path = '[' + ' -> '.join([p for p in path]) + ']' - log.debug('Building %s', fp_path) + fp_path = "[" + " -> ".join([p for p in path]) + "]" + log.debug("Building %s", fp_path) try: - circ_id = c.new_circuit( - path, await_build=True, timeout=timeout) - except (InvalidRequest, CircuitExtensionFailed, - ProtocolError, Timeout, SocketClosed) as e: + circ_id = c.new_circuit(path, await_build=True, timeout=timeout) + except ( + InvalidRequest, + CircuitExtensionFailed, + ProtocolError, + Timeout, + SocketClosed, + ) as e: return None, str(e) return circ_id, None
@@ -70,8 +81,9 @@ class CircuitBuilder: except (InvalidArguments, InvalidRequest): pass except (ControllerError, InvalidArguments) as e: - log.exception("Exception trying to get circuit to delete: %s", - e) + log.exception( + "Exception trying to get circuit to delete: %s", e + ) self.built_circuits.clear()
@@ -80,6 +92,7 @@ class CircuitBuilder: # does. class GapsCircuitBuilder(CircuitBuilder): """Same as ``CircuitBuilder`` but implements build_circuit.""" + def __init__(self, *a, **kw): super().__init__(*a, **kw)
diff --git a/sbws/lib/destination.py b/sbws/lib/destination.py index 84473be..a2bc855 100644 --- a/sbws/lib/destination.py +++ b/sbws/lib/destination.py @@ -13,8 +13,8 @@ from ..globals import ( DELTA_SECONDS_RETRY_DESTINATION, MAX_SECONDS_RETRY_DESTINATION, NUM_DESTINATION_ATTEMPTS_STORED, - FACTOR_INCREMENT_DESTINATION_RETRY - ) + FACTOR_INCREMENT_DESTINATION_RETRY, +) from sbws import settings
@@ -24,46 +24,47 @@ log = logging.getLogger(__name__) # Duplicate some code from DestinationList.from_config, # it should be refactored. def parse_destinations_countries(conf): - """Returns the destinations' country as string separated by comma. - - """ + """Returns the destinations' country as string separated by comma.""" destinations_countries = [] - for key in conf['destinations'].keys(): + for key in conf["destinations"].keys(): # Not a destination key - if key in ['usability_test_interval']: + if key in ["usability_test_interval"]: continue # The destination is not enabled - if not conf['destinations'].getboolean(key): + if not conf["destinations"].getboolean(key): continue - destination_section = 'destinations.{}'.format(key) - destination_country = conf[destination_section].get('country', None) + destination_section = "destinations.{}".format(key) + destination_country = conf[destination_section].get("country", None) destinations_countries.append(destination_country) - return ','.join(destinations_countries) + return ",".join(destinations_countries)
def _parse_verify_option(conf_section): - if 'verify' not in conf_section: + if "verify" not in conf_section: return DESTINATION_VERIFY_CERTIFICATE try: - verify = conf_section.getboolean('verify') + verify = conf_section.getboolean("verify") except ValueError: log.warning( - 'Currently sbws only supports verify=true/false, not a CA bundle ' - 'file. We think %s is not a bool, and thus must be a CA bundle ' - 'file. This is supposed to be allowed by the Python Requests ' - 'library, but pastly couldn't get it to work in his afternoon ' - 'of testing. So we will allow this, but expect Requests to throw ' - 'SSLError exceptions later. Have fun!', conf_section['verify']) - return conf_section['verify'] + "Currently sbws only supports verify=true/false, not a CA bundle " + "file. We think %s is not a bool, and thus must be a CA bundle " + "file. This is supposed to be allowed by the Python Requests " + "library, but pastly couldn't get it to work in his afternoon " + "of testing. So we will allow this, but expect Requests to throw " + "SSLError exceptions later. Have fun!", + conf_section["verify"], + ) + return conf_section["verify"] if not verify: # disable urllib3 warning: InsecureRequestWarning import urllib3 + urllib3.disable_warnings() return verify
def connect_to_destination_over_circuit(dest, circ_id, session, cont, max_dl): - ''' + """ Connect to **dest* over the given **circ_id** using the given Requests **session**. Make sure the destination seems usable. Return True and a dictionary of helpful information if we connected and the destination is @@ -98,13 +99,13 @@ def connect_to_destination_over_circuit(dest, circ_id, session, cont, max_dl): :param cont Controller: them Stem library controller controlling Tor :returns: True and a dictionary if everything is in order and measurements should commence. False and an error string otherwise. - ''' + """ assert isinstance(dest, Destination) log.debug("Connecting to destination over circuit.") # Do not start if sbws is stopping if settings.end_event.is_set(): return False, "Shutting down." - error_prefix = 'When sending HTTP HEAD to {}, '.format(dest.url) + error_prefix = "When sending HTTP HEAD to {}, ".format(dest.url) with stem_utils.stream_building_lock: listener = stem_utils.attach_stream_to_circuit_listener(cont, circ_id) stem_utils.add_event_listener(cont, listener, EventType.STREAM) @@ -112,41 +113,56 @@ def connect_to_destination_over_circuit(dest, circ_id, session, cont, max_dl): head = session.head(dest.url, verify=dest.verify) except requests.exceptions.RequestException as e: dest.add_failure() - return False, 'Could not connect to {} over circ {} {}: {}'.format( - dest.url, circ_id, stem_utils.circuit_str(cont, circ_id), e) + return False, "Could not connect to {} over circ {} {}: {}".format( + dest.url, circ_id, stem_utils.circuit_str(cont, circ_id), e + ) finally: stem_utils.remove_event_listener(cont, listener) if head.status_code != requests.codes.ok: dest.add_failure() - return False, error_prefix + 'we expected HTTP code '\ - '{} not {}'.format(requests.codes.ok, head.status_code) - if 'content-length' not in head.headers: + return ( + False, + error_prefix + "we expected HTTP code " + "{} not {}".format(requests.codes.ok, head.status_code), + ) + if "content-length" not in head.headers: dest.add_failure() - return False, error_prefix + 'we expect the header Content-Length '\ - 'to exist in the response' - content_length = int(head.headers['content-length']) + return ( + False, + error_prefix + "we expect the header Content-Length " + "to exist in the response", + ) + content_length = int(head.headers["content-length"]) if max_dl > content_length: dest.add_failure() - return False, error_prefix + 'our maximum configured download size '\ - 'is {} but the content is only {}'.format(max_dl, content_length) - log.debug('Connected to %s over circuit %s', dest.url, circ_id) + return ( + False, + error_prefix + "our maximum configured download size " + "is {} but the content is only {}".format(max_dl, content_length), + ) + log.debug("Connected to %s over circuit %s", dest.url, circ_id) # Any failure connecting to the destination will call add_failure, # It can not be set at the start, to be able to know whether it is # failing consecutive times. dest.add_success() - return True, {'content_length': content_length} + return True, {"content_length": content_length}
class Destination: - """Web server from which data is downloaded to measure bandwidth. - """ + """Web server from which data is downloaded to measure bandwidth.""" + # NOTE: max_dl and verify should be optional and have defaults - def __init__(self, url, max_dl, verify, - max_num_failures=MAX_NUM_DESTINATION_FAILURES, - delta_seconds_retry=DELTA_SECONDS_RETRY_DESTINATION, - max_seconds_between_retries=MAX_SECONDS_RETRY_DESTINATION, - num_attempts_stored=NUM_DESTINATION_ATTEMPTS_STORED, - factor_increment_retry=FACTOR_INCREMENT_DESTINATION_RETRY): + def __init__( + self, + url, + max_dl, + verify, + max_num_failures=MAX_NUM_DESTINATION_FAILURES, + delta_seconds_retry=DELTA_SECONDS_RETRY_DESTINATION, + max_seconds_between_retries=MAX_SECONDS_RETRY_DESTINATION, + num_attempts_stored=NUM_DESTINATION_ATTEMPTS_STORED, + factor_increment_retry=FACTOR_INCREMENT_DESTINATION_RETRY, + ): """Initalizes the Web server from which the data is downloaded.
:param str url: Web server data URL to download. @@ -178,8 +194,12 @@ class Destination: # Store tuples of timestamp and whether the destination succed or not # (succed, 1, failed, 0). # Initialize it as if it never failed. - self._attempts = collections.deque([(datetime.datetime.utcnow(), 1), ], - maxlen=self._num_attempts_stored) + self._attempts = collections.deque( + [ + (datetime.datetime.utcnow(), 1), + ], + maxlen=self._num_attempts_stored, + ) self._factor = factor_increment_retry
def _last_attempts(self, n=None): @@ -187,8 +207,9 @@ class Destination: # deque does not accept slices, # a new deque is returned with the last n items # (or less if there were less). - return collections.deque(self._attempts, - maxlen=(n or self._max_num_failures)) + return collections.deque( + self._attempts, maxlen=(n or self._max_num_failures) + )
def _are_last_attempts_failures(self, n=None): """ @@ -197,8 +218,9 @@ class Destination: """ # Count the number that there was a failure when used n = n if n else self._max_num_failures - return ([i[1] for i in self._last_attempts(n)].count(0) - >= self._max_num_failures) + return [i[1] for i in self._last_attempts(n)].count( + 0 + ) >= self._max_num_failures
def _increment_time_to_retry(self, factor=None): """ @@ -207,12 +229,18 @@ class Destination: self._delta_seconds_retry *= factor or self._factor if self._delta_seconds_retry > self._max_seconds_between_retries: self._delta_seconds_retry = self._max_seconds_between_retries - log.debug("Incremented the time to try destination %s past the " - "limit, capping it at %s hours.", - self.url, self._delta_seconds_retry / 60 / 60) + log.debug( + "Incremented the time to try destination %s past the " + "limit, capping it at %s hours.", + self.url, + self._delta_seconds_retry / 60 / 60, + ) else: - log.debug("Incremented the time to try destination %s to %s " - "hours.", self.url, self._delta_seconds_retry / 60 / 60) + log.debug( + "Incremented the time to try destination %s to %s " "hours.", + self.url, + self._delta_seconds_retry / 60 / 60, + )
def _get_last_try_in_seconds_ago(self): """ @@ -227,8 +255,7 @@ class Destination: Return True if the last time it was used it was ``n`` seconds ago. """ # If the last attempt is older than _delta_seconds_retry, try again - return (self._get_last_try_in_seconds_ago() > - self._delta_seconds_retry) + return self._get_last_try_in_seconds_ago() > self._delta_seconds_retry
def is_functional(self): """Whether connections to a destination are failing or not. @@ -252,27 +279,35 @@ class Destination: if self._are_last_attempts_failures(): # The log here will appear in all the the queued relays and # threads. - log.debug("The last %s times the destination %s failed. " - "It last ran %s seconds ago. " - "Disabled for %s seconds.", - self._max_num_failures, self.url, - self._get_last_try_in_seconds_ago(), - self._delta_seconds_retry) - log.warning("The last %s times a destination failed. " - "It last ran %s seconds ago. " - "Disabled for %s seconds." - "Please, add more destinations or increment the " - "number of maximum number of consecutive failures " - "in the configuration.", - self._max_num_failures, - self._get_last_try_in_seconds_ago(), - self._delta_seconds_retry) + log.debug( + "The last %s times the destination %s failed. " + "It last ran %s seconds ago. " + "Disabled for %s seconds.", + self._max_num_failures, + self.url, + self._get_last_try_in_seconds_ago(), + self._delta_seconds_retry, + ) + log.warning( + "The last %s times a destination failed. " + "It last ran %s seconds ago. " + "Disabled for %s seconds." + "Please, add more destinations or increment the " + "number of maximum number of consecutive failures " + "in the configuration.", + self._max_num_failures, + self._get_last_try_in_seconds_ago(), + self._delta_seconds_retry, + ) # It was not used for a while and the last time it was used # was long ago, then try again if self._is_last_try_old_enough(): - log.debug("The destination %s was not tried for %s seconds, " - "it is going to by tried again.", self.url, - self._get_last_try_in_seconds_ago()) + log.debug( + "The destination %s was not tried for %s seconds, " + "it is going to by tried again.", + self.url, + self._get_last_try_in_seconds_ago(), + ) # Set the next time to retry higher, in case this attempt fails self._increment_time_to_retry() return True @@ -305,26 +340,28 @@ class Destination: p = self._url.port scheme = self._url.scheme if p is None: - if scheme == 'http': + if scheme == "http": p = 80 - elif scheme == 'https': + elif scheme == "https": p = 443 else: - assert None, 'Unreachable. Unknown scheme {}'.format(scheme) + assert None, "Unreachable. Unknown scheme {}".format(scheme) assert p is not None return p
@staticmethod def from_config(conf_section, max_dl, number_threads): - assert 'url' in conf_section - url = conf_section['url'] + assert "url" in conf_section + url = conf_section["url"] verify = _parse_verify_option(conf_section) try: # Because one a destination fails, all the threads that are using # it at that moment will fail too, multiply by the number of # threads. - max_num_failures = (conf_section.getint('max_num_failures') - or MAX_NUM_DESTINATION_FAILURES) + max_num_failures = ( + conf_section.getint("max_num_failures") + or MAX_NUM_DESTINATION_FAILURES + ) except ValueError: # If the operator did not setup the number, set to the default. max_num_failures = MAX_NUM_DESTINATION_FAILURES @@ -350,36 +387,45 @@ class DestinationList:
@staticmethod def from_config(conf, circuit_builder, relay_list, controller): - assert 'destinations' in conf - section = conf['destinations'] + assert "destinations" in conf + section = conf["destinations"] dests = [] for key in section.keys(): - if key in ['usability_test_interval']: + if key in ["usability_test_interval"]: continue if not section.getboolean(key): - log.debug('%s is disabled; not loading it', key) + log.debug("%s is disabled; not loading it", key) continue - dest_sec = 'destinations.{}'.format(key) + dest_sec = "destinations.{}".format(key) assert dest_sec in conf # validate_config should require this - log.debug('Loading info for destination %s', key) - dests.append(Destination.from_config( - conf[dest_sec], - # Multiply by the number of threads since all the threads will - # fail at the same time. - conf.getint('scanner', 'max_download_size'), - conf.getint('scanner', 'measurement_threads'))) + log.debug("Loading info for destination %s", key) + dests.append( + Destination.from_config( + conf[dest_sec], + # Multiply by the number of threads since all the threads will + # fail at the same time. + conf.getint("scanner", "max_download_size"), + conf.getint("scanner", "measurement_threads"), + ) + ) if len(dests) < 1: - msg = 'No enabled destinations in config. Please see '\ - 'docs/source/man_sbws.ini.rst" or "man 5 sbws.ini" ' \ - 'for help adding and enabling destinations.' + msg = ( + "No enabled destinations in config. Please see " + 'docs/source/man_sbws.ini.rst" or "man 5 sbws.ini" ' + "for help adding and enabling destinations." + ) return None, msg - return DestinationList(conf, dests, circuit_builder, relay_list, - controller), '' + return ( + DestinationList( + conf, dests, circuit_builder, relay_list, controller + ), + "", + )
def next(self): - ''' + """ Returns the next destination that should be used in a measurement - ''' + """ # Do not perform usability tests since a destination is already proven # usable or not in every measurement, and it should depend on a X # number of failures. diff --git a/sbws/lib/heartbeat.py b/sbws/lib/heartbeat.py index 1236890..89d6198 100644 --- a/sbws/lib/heartbeat.py +++ b/sbws/lib/heartbeat.py @@ -45,25 +45,28 @@ class Heartbeat(object): Log the percentage, the number of relays measured and not measured, the number of loops and the time elapsed since it started measuring. """ - loops_count = self.state_dict.count('recent_priority_list') + loops_count = self.state_dict.count("recent_priority_list")
not_measured_fp_set = self.consensus_fp_set.difference( self.measured_fp_set - ) + ) main_loop_tdelta = (time.monotonic() - self.main_loop_tstart) / 60 new_measured_percent = round( len(self.measured_fp_set) / len(self.consensus_fp_set) * 100 - ) + )
log.info("Run %s main loops.", loops_count) - log.info("Measured in total %s (%s%%) unique relays in %s minutes", - len(self.measured_fp_set), new_measured_percent, - main_loop_tdelta) + log.info( + "Measured in total %s (%s%%) unique relays in %s minutes", + len(self.measured_fp_set), + new_measured_percent, + main_loop_tdelta, + ) log.info("%s relays still not measured.", len(not_measured_fp_set))
# The case when it is equal will only happen when all the relays # have been measured. - if (new_measured_percent <= self.previous_measurement_percent): + if new_measured_percent <= self.previous_measurement_percent: log.warning("There is no progress measuring new unique relays.")
self.previous_measurement_percent = new_measured_percent diff --git a/sbws/lib/relaylist.py b/sbws/lib/relaylist.py index 2a0bd8d..06de489 100644 --- a/sbws/lib/relaylist.py +++ b/sbws/lib/relaylist.py @@ -12,7 +12,7 @@ from ..globals import ( MAX_RECENT_CONSENSUS_COUNT, MAX_RECENT_PRIORITY_RELAY_COUNT, MAX_RECENT_PRIORITY_LIST_COUNT, - MEASUREMENTS_PERIOD + MEASUREMENTS_PERIOD, ) from ..util import timestamps
@@ -28,9 +28,9 @@ def valid_after_from_network_statuses(network_statuses): returns datetime: """ for ns in network_statuses: - document = getattr(ns, 'document', None) + document = getattr(ns, "document", None) if document: - valid_after = getattr(document, 'valid_after', None) + valid_after = getattr(document, "valid_after", None) if valid_after: return valid_after return datetime.utcnow().replace(microsecond=0) @@ -38,7 +38,7 @@ def valid_after_from_network_statuses(network_statuses):
class Relay: def __init__(self, fp, cont, ns=None, desc=None, timestamp=None): - ''' + """ Given a relay fingerprint, fetch all the information about a relay that sbws currently needs and store it in this class. Acts as an abstraction to hide the confusion that is Tor consensus/descriptor stuff. @@ -48,7 +48,7 @@ class Relay:
:param datatime timestamp: the timestamp of a consensus (RouterStatusEntryV3) from which this relay has been obtained. - ''' + """ assert isinstance(fp, str) assert len(fp) == 40 if ns is not None: @@ -99,31 +99,31 @@ class Relay:
@property def nickname(self): - return self._from_ns('nickname') + return self._from_ns("nickname")
@property def fingerprint(self): - return self._from_ns('fingerprint') + return self._from_ns("fingerprint")
@property def flags(self): - return self._from_ns('flags') + return self._from_ns("flags")
@property def exit_policy(self): - return self._from_desc('exit_policy') + return self._from_desc("exit_policy")
@property def average_bandwidth(self): - return self._from_desc('average_bandwidth') + return self._from_desc("average_bandwidth")
@property def burst_bandwidth(self): - return self._from_desc('burst_bandwidth') + return self._from_desc("burst_bandwidth")
@property def observed_bandwidth(self): - return self._from_desc('observed_bandwidth') + return self._from_desc("observed_bandwidth")
@property def consensus_bandwidth(self): @@ -131,19 +131,19 @@ class Relay:
Consensus bandwidth is the only bandwidth value that is in kilobytes. """ - if self._from_ns('bandwidth') is not None: - return self._from_ns('bandwidth') * 1000 + if self._from_ns("bandwidth") is not None: + return self._from_ns("bandwidth") * 1000
@property def consensus_bandwidth_is_unmeasured(self): # measured appears only votes, unmeasured appears in consensus # therefore is_unmeasured is needed to know whether the bandwidth # value in consensus is comming from bwauth measurements or not. - return self._from_ns('is_unmeasured') + return self._from_ns("is_unmeasured")
@property def address(self): - return self._from_ns('address') + return self._from_ns("address")
@property def master_key_ed25519(self): @@ -155,19 +155,19 @@ class Relay: """ # Even if this key is called master-key-ed25519 in dir-spec.txt, # it seems that stem parses it as ed25519_master_key - key = self._from_desc('ed25519_master_key') + key = self._from_desc("ed25519_master_key") if key is None: return None - return key.rstrip('=') + return key.rstrip("=")
@property def consensus_valid_after(self): """Obtain the consensus Valid-After from the document of this relay network status. """ - network_status_document = self._from_ns('document') + network_status_document = self._from_ns("document") if network_status_document: - return getattr(network_status_document, 'valid_after', None) + return getattr(network_status_document, "valid_after", None) return None
@property @@ -222,18 +222,19 @@ class Relay: # When ``strict`` is true, We could increase the chances that # the exit can exit via IPv6 too (``exit_policy_v6``). However, # in theory that is only known using microdescriptors. - return ( - self.exit_policy.strip_private() - .can_exit_to(port=port, strict=strict) + return self.exit_policy.strip_private().can_exit_to( + port=port, strict=strict ) except TypeError: return False return False
def is_exit_not_bad_allowing_port(self, port, strict=False): - return (Flag.BADEXIT not in self.flags and - Flag.EXIT in self.flags and - self.can_exit_to_port(port, strict)) + return ( + Flag.BADEXIT not in self.flags + and Flag.EXIT in self.flags + and self.can_exit_to_port(port, strict) + )
def increment_relay_recent_measurement_attempt(self): """ @@ -276,13 +277,19 @@ class Relay:
class RelayList: - ''' Keeps a list of all relays in the current Tor network and updates it + """Keeps a list of all relays in the current Tor network and updates it transparently in the background. Provides useful interfaces for getting only relays of a certain type. - ''' + """
- def __init__(self, args, conf, controller, - measurements_period=MEASUREMENTS_PERIOD, state=None): + def __init__( + self, + args, + conf, + controller, + measurements_period=MEASUREMENTS_PERIOD, + state=None, + ): self._controller = controller self.rng = random.SystemRandom() self._refresh_lock = Lock() @@ -297,8 +304,10 @@ class RelayList: # The period of time for which the measurements are keep. self._measurements_period = measurements_period self._recent_measurement_attempt = timestamps.DateTimeSeq( - [], MAX_RECENT_PRIORITY_RELAY_COUNT, state, - "recent_measurement_attempt" + [], + MAX_RECENT_PRIORITY_RELAY_COUNT, + state, + "recent_measurement_attempt", ) # Start with 0 for the min bw for our second hops self._exit_min_bw = 0 @@ -307,8 +316,9 @@ class RelayList:
def _need_refresh(self): # New consensuses happen every hour. - return datetime.utcnow() >= \ - self.last_consensus_timestamp + timedelta(seconds=60*60) + return datetime.utcnow() >= self.last_consensus_timestamp + timedelta( + seconds=60 * 60 + )
@property def last_consensus_timestamp(self): @@ -320,23 +330,29 @@ class RelayList: # See if we can get the list of relays without having to do a refresh, # which is expensive and blocks other threads if self._need_refresh(): - log.debug('We need to refresh our list of relays. ' - 'Going to wait for lock.') + log.debug( + "We need to refresh our list of relays. " + "Going to wait for lock." + ) # Whelp we couldn't just get the list of relays because the list is # stale. Wait for the lock so we can refresh it. with self._refresh_lock: - log.debug('We got the lock. Now to see if we still ' - 'need to refresh.') + log.debug( + "We got the lock. Now to see if we still " + "need to refresh." + ) # Now we have the lock ... but wait! Maybe someone else already # did the refreshing. So check if it still needs refreshing. If # not, we can do nothing. if self._need_refresh(): - log.debug('Yup we need to refresh our relays. Doing so.') + log.debug("Yup we need to refresh our relays. Doing so.") self._refresh() else: - log.debug('No we don't need to refresh our relays. ' - 'It was done by someone else.') - log.debug('Giving back the lock for refreshing relays.') + log.debug( + "No we don't need to refresh our relays. " + "It was done by someone else." + ) + log.debug("Giving back the lock for refreshing relays.") return self._relays
@property @@ -390,8 +406,10 @@ class RelayList: # Change to stem.descriptor.remote in future refactor. network_statuses = c.get_network_statuses() new_relays_dict = dict([(r.fingerprint, r) for r in network_statuses]) - log.debug("Number of relays in the current consensus: %d.", - len(new_relays_dict)) + log.debug( + "Number of relays in the current consensus: %d.", + len(new_relays_dict), + )
# Find the timestamp of the last consensus. timestamp = valid_after_from_network_statuses(network_statuses) @@ -438,22 +456,30 @@ class RelayList: new_relays.append(r)
days = self._measurements_period / (60 * 60 * 24) - log.debug("Previous number of relays being measured %d", - len(self._relays)) - log.debug("Number of relays not in the in the consensus in the last " - "%d days: %d.", - days, num_old_relays) - log.debug("Number of relays to measure with the current consensus: " - "%d", len(new_relays)) + log.debug( + "Previous number of relays being measured %d", len(self._relays) + ) + log.debug( + "Number of relays not in the in the consensus in the last " + "%d days: %d.", + days, + num_old_relays, + ) + log.debug( + "Number of relays to measure with the current consensus: " "%d", + len(new_relays), + ) return new_relays
def _refresh(self): # Set a new list of relays. self._relays = self._init_relays()
- log.info("Number of consensuses obtained in the last %s days: %s.", - int(self._measurements_period / 24 / 60 / 60), - self.recent_consensus_count) + log.info( + "Number of consensuses obtained in the last %s days: %s.", + int(self._measurements_period / 24 / 60 / 60), + self.recent_consensus_count, + )
# Calculate minimum bandwidth value for 2nd hop after we refreshed # our available relays. @@ -465,8 +491,11 @@ class RelayList: return len(self._recent_consensus)
def exits_not_bad_allowing_port(self, port, strict=False): - return [r for r in self.exits - if r.is_exit_not_bad_allowing_port(port, strict)] + return [ + r + for r in self.exits + if r.is_exit_not_bad_allowing_port(port, strict) + ]
def increment_recent_measurement_attempt(self): """ @@ -493,21 +522,24 @@ class RelayList: # Sort our sets of candidates according to bw, lowest amount first. # It's okay to keep things simple for the calculation and go over all # exits, including badexits. - exit_candidates = sorted(self.exits, - key=lambda r: r.consensus_bandwidth) - non_exit_candidates = sorted(self.non_exits, - key=lambda r: r.consensus_bandwidth) + exit_candidates = sorted( + self.exits, key=lambda r: r.consensus_bandwidth + ) + non_exit_candidates = sorted( + self.non_exits, key=lambda r: r.consensus_bandwidth + ) # We know the bandwidth is sorted from least to most. Dividing the # length of the available relays by 4 gives us the position of the # relay with the lowest bandwidth from the top 75%. We do this both # for our exit and non-exit candidates. - pos = int(len(exit_candidates)/4) + pos = int(len(exit_candidates) / 4) self._exit_min_bw = exit_candidates[pos].consensus_bandwidth - pos = int(len(non_exit_candidates)/4) + pos = int(len(non_exit_candidates) / 4) # when there are not non-exits in a test network if pos: - self._non_exit_min_bw = \ - non_exit_candidates[pos].consensus_bandwidth + self._non_exit_min_bw = non_exit_candidates[ + pos + ].consensus_bandwidth
def exit_min_bw(self): return self._exit_min_bw diff --git a/sbws/lib/relayprioritizer.py b/sbws/lib/relayprioritizer.py index 2d6a488..e734e03 100644 --- a/sbws/lib/relayprioritizer.py +++ b/sbws/lib/relayprioritizer.py @@ -16,21 +16,27 @@ class RelayPrioritizer: def __init__(self, args, conf, relay_list, result_dump): assert isinstance(relay_list, RelayList) assert isinstance(result_dump, ResultDump) - self.fresh_seconds = conf.getint('general', 'data_period')*24*60*60 + self.fresh_seconds = ( + conf.getint("general", "data_period") * 24 * 60 * 60 + ) self.relay_list = relay_list self.result_dump = result_dump self.measure_authorities = conf.getboolean( - 'relayprioritizer', 'measure_authorities') - self.min_to_return = conf.getint('relayprioritizer', 'min_relays') + "relayprioritizer", "measure_authorities" + ) + self.min_to_return = conf.getint("relayprioritizer", "min_relays") self.fraction_to_return = conf.getfloat( - 'relayprioritizer', 'fraction_relays') - self._state = state.State(conf.getpath('paths', 'state_fname')) + "relayprioritizer", "fraction_relays" + ) + self._state = state.State(conf.getpath("paths", "state_fname")) self._recent_priority_list = timestamps.DateTimeSeq( [], 120, self._state, "recent_priority_list" ) self._recent_priority_relay = timestamps.DateTimeIntSeq( - [], MAX_RECENT_PRIORITY_RELAY_COUNT, self._state, - "recent_priority_relay" + [], + MAX_RECENT_PRIORITY_RELAY_COUNT, + self._state, + "recent_priority_relay", )
def increment_recent_priority_list(self): @@ -59,8 +65,9 @@ class RelayPrioritizer: def recent_priority_relay_count(self): return len(self._recent_priority_relay)
- def best_priority(self, prioritize_result_error=False, - return_fraction=True): + def best_priority( + self, prioritize_result_error=False, return_fraction=True + ): """Yields a new ordered list of relays to be measured next.
The relays that were measured farther away in the past, @@ -122,8 +129,10 @@ class RelayPrioritizer: # Calculate freshness as the remaining time until this result # is no longer valid freshness = result.time - oldest_allowed - if isinstance(result, ResultError) \ - and prioritize_result_error is True: + if ( + isinstance(result, ResultError) + and prioritize_result_error is True + ): # log.debug('Cutting freshness for a %s result by %d%% for' # ' %s', result.type.value, # result.freshness_reduction_factor * 100, @@ -133,7 +142,9 @@ class RelayPrioritizer: # depending on the type of error. # In a future refactor, create these values on an algorithm # or create constants. - freshness *= max(1.0-result.freshness_reduction_factor, 0) + freshness *= max( + 1.0 - result.freshness_reduction_factor, 0 + ) priority += freshness # In a future refactor, do not create a new attribute relay.priority = priority @@ -143,12 +154,13 @@ class RelayPrioritizer:
fn_tstop = Decimal(time.time()) fn_tdelta = (fn_tstop - fn_tstart) * 1000 - log.info('Spent %f msecs calculating relay best priority', fn_tdelta) + log.info("Spent %f msecs calculating relay best priority", fn_tdelta)
# Return a fraction of relays in the network if return_fraction is # True, otherwise return all. - cutoff = max(int(len(relays) * self.fraction_to_return), - self.min_to_return) + cutoff = max( + int(len(relays) * self.fraction_to_return), self.min_to_return + ) upper_limit = cutoff if return_fraction else len(relays) # NOTE: these two are blocking, write to disk # Increment the number of times ``best_priority`` has been run. @@ -158,11 +170,14 @@ class RelayPrioritizer: # than the number of relays in the network, use the length of the list. self.increment_recent_priority_relay(len(relays[0:upper_limit])) for relay in relays[0:upper_limit]: - log.debug('Returning next relay %s with priority %f', - relay.nickname, relay.priority) + log.debug( + "Returning next relay %s with priority %f", + relay.nickname, + relay.priority, + ) # In a future refactor, a new attribute should not be created, # then no need to remove it. - del(relay.priority) + del relay.priority # Increment the number of times a realy was "prioritized" to be # measured. relay.increment_relay_recent_priority_list() diff --git a/sbws/lib/resultdump.py b/sbws/lib/resultdump.py index cc61bcc..58543b2 100644 --- a/sbws/lib/resultdump.py +++ b/sbws/lib/resultdump.py @@ -20,11 +20,11 @@ log = logging.getLogger(__name__)
def merge_result_dicts(d1, d2): - ''' + """ Given two dictionaries that contain Result data, merge them. Result dictionaries have keys of relay fingerprints and values of lists of results for those relays. - ''' + """ for key in d2: if key not in d1: d1[key] = [] @@ -33,16 +33,16 @@ def merge_result_dicts(d1, d2):
def load_result_file(fname, success_only=False): - ''' Reads in all lines from the given file, and parses them into Result + """Reads in all lines from the given file, and parses them into Result structures (or subclasses of Result). Optionally only keeps ResultSuccess. Returns all kept Results as a result dictionary. This function does not - care about the age of the results ''' + care about the age of the results""" assert os.path.isfile(fname) d = {} num_total = 0 num_ignored = 0 with DirectoryLock(os.path.dirname(fname)): - with open(fname, 'rt') as fd: + with open(fname, "rt") as fd: for line in fd: num_total += 1 try: @@ -50,7 +50,7 @@ def load_result_file(fname, success_only=False): json.loads(line.strip(), cls=CustomDecoder) ) except json.decoder.JSONDecodeError: - log.warning('Could not decode result %s', line.strip()) + log.warning("Could not decode result %s", line.strip()) r = None if r is None: num_ignored += 1 @@ -62,19 +62,22 @@ def load_result_file(fname, success_only=False): d[fp] = [] d[fp].append(r) num_kept = sum([len(d[fp]) for fp in d]) - log.debug('Keeping %d/%d read lines from %s', num_kept, num_total, fname) + log.debug("Keeping %d/%d read lines from %s", num_kept, num_total, fname) if num_ignored > 0: - log.warning('Had to ignore %d results due to not knowing how to ' - 'parse them.', num_ignored) + log.warning( + "Had to ignore %d results due to not knowing how to " + "parse them.", + num_ignored, + ) return d
def trim_results(fresh_days, result_dict): - ''' Given a result dictionary, remove all Results that are no longer valid - and return the new dictionary ''' + """Given a result dictionary, remove all Results that are no longer valid + and return the new dictionary""" assert isinstance(fresh_days, int) assert isinstance(result_dict, dict) - data_period = fresh_days * 24*60*60 + data_period = fresh_days * 24 * 60 * 60 oldest_allowed = time.time() - data_period out_results = {} for fp in result_dict: @@ -85,12 +88,13 @@ def trim_results(fresh_days, result_dict): out_results[fp].append(result) num_in = sum([len(result_dict[fp]) for fp in result_dict]) num_out = sum([len(out_results[fp]) for fp in out_results]) - log.debug('Keeping %d/%d results after removing old ones', num_out, num_in) + log.debug("Keeping %d/%d results after removing old ones", num_out, num_in) return out_results
-def trim_results_ip_changed(result_dict, on_changed_ipv4=False, - on_changed_ipv6=False): +def trim_results_ip_changed( + result_dict, on_changed_ipv4=False, on_changed_ipv6=False +): """When there are results for the same relay with different IPs, create a new results' dictionary without that relay's results using an older IP. @@ -116,24 +120,33 @@ def trim_results_ip_changed(result_dict, on_changed_ipv4=False, # a relay that change address ordered_results = sorted(results, key=lambda r: r.time) latest_address = ordered_results[-1].address - last_ip_results = [result for result in results - if result.address == latest_address] + last_ip_results = [ + result + for result in results + if result.address == latest_address + ] new_results_dict[fp] = last_ip_results else: new_results_dict[fp] = results return new_results_dict if on_changed_ipv6 is True: - log.warning("Reseting bandwidth results when IPv6 changes," - " is not yet implemented.") + log.warning( + "Reseting bandwidth results when IPv6 changes," + " is not yet implemented." + ) return result_dict
-def load_recent_results_in_datadir(fresh_days, datadir, success_only=False, - on_changed_ipv4=False, - on_changed_ipv6=False): - ''' Given a data directory, read all results files in it that could have +def load_recent_results_in_datadir( + fresh_days, + datadir, + success_only=False, + on_changed_ipv4=False, + on_changed_ipv6=False, +): + """Given a data directory, read all results files in it that could have results in them that are still valid. Trim them, and return the valid - Results as a list ''' + Results as a list""" assert isinstance(fresh_days, int) assert os.path.isdir(datadir) # Inform the results are being loaded, since it takes some seconds. @@ -148,41 +161,47 @@ def load_recent_results_in_datadir(fresh_days, datadir, success_only=False, # So instead settle on finding files in the datadir and one # subdirectory below the datadir that fit the form of YYYY-MM-DD*.txt d = working_day.date() - patterns = [os.path.join(datadir, '{}*.txt'.format(d)), - os.path.join(datadir, '*', '{}*.txt'.format(d))] + patterns = [ + os.path.join(datadir, "{}*.txt".format(d)), + os.path.join(datadir, "*", "{}*.txt".format(d)), + ] for pattern in patterns: for fname in glob(pattern): new_results = load_result_file( - fname, success_only=success_only) + fname, success_only=success_only + ) results = merge_result_dicts(results, new_results) working_day += timedelta(days=1) results = trim_results(fresh_days, results) # in time fresh days is possible that a relay changed ip, # if that's the case, keep only the results for the last ip - results = trim_results_ip_changed(results, on_changed_ipv4, - on_changed_ipv6) + results = trim_results_ip_changed( + results, on_changed_ipv4, on_changed_ipv6 + ) num_res = sum([len(results[fp]) for fp in results]) if num_res == 0: - log.warning('Results files that are valid not found. ' - 'Probably sbws scanner was not run first or ' - 'it ran more than %d days ago or ' - 'it was using a different datadir than %s.', data_period, - datadir) + log.warning( + "Results files that are valid not found. " + "Probably sbws scanner was not run first or " + "it ran more than %d days ago or " + "it was using a different datadir than %s.", + data_period, + datadir, + ) return results
def write_result_to_datadir(result, datadir): - ''' Can be called from any thread ''' + """ Can be called from any thread """ assert isinstance(result, Result) assert os.path.isdir(datadir) dt = datetime.utcfromtimestamp(result.time) - ext = '.txt' - result_fname = os.path.join( - datadir, '{}{}'.format(dt.date(), ext)) + ext = ".txt" + result_fname = os.path.join(datadir, "{}{}".format(dt.date(), ext)) with DirectoryLock(datadir): - log.debug('Writing a result to %s', result_fname) - with open(result_fname, 'at') as fd: - fd.write('{}\n'.format(str(result))) + log.debug("Writing a result to %s", result_fname) + with open(result_fname, "at") as fd: + fd.write("{}\n".format(str(result)))
class _StrEnum(str, Enum): @@ -190,17 +209,17 @@ class _StrEnum(str, Enum):
class _ResultType(_StrEnum): - Success = 'success' - Error = 'error-misc' - ErrorCircuit = 'error-circ' - ErrorStream = 'error-stream' - ErrorAuth = 'error-auth' + Success = "success" + Error = "error-misc" + ErrorCircuit = "error-circ" + ErrorStream = "error-stream" + ErrorAuth = "error-auth" # When it can not be found a second relay suitable to measure a relay. # It is used in ``ResultErrorSecondRelay``. - ErrorSecondRelay = 'error-second-relay' + ErrorSecondRelay = "error-second-relay" # When there is not a working destination Web Server. # It is used in ``ResultErrorDestionation``. - ErrorDestination = 'error-destination' + ErrorDestination = "error-destination"
class Result: @@ -219,15 +238,24 @@ class Result: measurements and a measurement has a relay, instead of every measurement re-implementing ``Relay``. """ - def __init__(self, fingerprint, nickname, address, master_key_ed25519, - average_bandwidth=None, burst_bandwidth=None, - observed_bandwidth=None, consensus_bandwidth=None, - consensus_bandwidth_is_unmeasured=None, - # Counters to be stored by relay and not per measurement, - # since the measurements might fail. - relay_in_recent_consensus=None, - relay_recent_measurement_attempt=None, - relay_recent_priority_list=None): + + def __init__( + self, + fingerprint, + nickname, + address, + master_key_ed25519, + average_bandwidth=None, + burst_bandwidth=None, + observed_bandwidth=None, + consensus_bandwidth=None, + consensus_bandwidth_is_unmeasured=None, + # Counters to be stored by relay and not per measurement, + # since the measurements might fail. + relay_in_recent_consensus=None, + relay_recent_measurement_attempt=None, + relay_recent_priority_list=None, + ): """ Initializes a ``Result.Relay``.
@@ -245,22 +273,24 @@ class Result: self.burst_bandwidth = burst_bandwidth self.observed_bandwidth = observed_bandwidth self.consensus_bandwidth = consensus_bandwidth - self.consensus_bandwidth_is_unmeasured = \ + self.consensus_bandwidth_is_unmeasured = ( consensus_bandwidth_is_unmeasured - self.relay_in_recent_consensus = \ - relay_in_recent_consensus - self.relay_recent_measurement_attempt = \ + ) + self.relay_in_recent_consensus = relay_in_recent_consensus + self.relay_recent_measurement_attempt = ( relay_recent_measurement_attempt - self.relay_recent_priority_list = \ - relay_recent_priority_list + ) + self.relay_recent_priority_list = relay_recent_priority_list
def __init__(self, relay, circ, dest_url, scanner_nick, t=None): """ Initilizes the measurement and the relay with all the relay attributes. """ self._relay = Result.Relay( - relay.fingerprint, relay.nickname, - relay.address, relay.master_key_ed25519, + relay.fingerprint, + relay.nickname, + relay.address, + relay.master_key_ed25519, relay.average_bandwidth, relay.burst_bandwidth, relay.observed_bandwidth, @@ -268,8 +298,8 @@ class Result: relay.consensus_bandwidth_is_unmeasured, relay.relay_in_recent_consensus, relay.relay_recent_measurement_attempt, - relay.relay_recent_priority_list - ) + relay.relay_recent_priority_list, + ) self._circ = circ self._dest_url = dest_url self._scanner = scanner_nick @@ -360,22 +390,19 @@ class Result:
def to_dict(self): return { - 'fingerprint': self.fingerprint, - 'nickname': self.nickname, - 'address': self.address, - 'master_key_ed25519': self.master_key_ed25519, - 'circ': self.circ, - 'dest_url': self.dest_url, - 'time': self.time, - 'type': self.type, - 'scanner': self.scanner, - 'version': self.version, - 'relay_in_recent_consensus': - self.relay_in_recent_consensus, - 'relay_recent_measurement_attempt': - self.relay_recent_measurement_attempt, - 'relay_recent_priority_list': - self.relay_recent_priority_list, + "fingerprint": self.fingerprint, + "nickname": self.nickname, + "address": self.address, + "master_key_ed25519": self.master_key_ed25519, + "circ": self.circ, + "dest_url": self.dest_url, + "time": self.time, + "type": self.type, + "scanner": self.scanner, + "version": self.version, + "relay_in_recent_consensus": self.relay_in_recent_consensus, + "relay_recent_measurement_attempt": self.relay_recent_measurement_attempt, + "relay_recent_priority_list": self.relay_recent_priority_list, }
@staticmethod @@ -395,27 +422,28 @@ class Result:
``version`` is not being used and should be removed. """ - assert 'version' in d - if d['version'] != RESULT_VERSION: + assert "version" in d + if d["version"] != RESULT_VERSION: return None - assert 'type' in d - if d['type'] == _ResultType.Success.value: + assert "type" in d + if d["type"] == _ResultType.Success.value: return ResultSuccess.from_dict(d) - elif d['type'] == _ResultType.Error.value: + elif d["type"] == _ResultType.Error.value: return ResultError.from_dict(d) - elif d['type'] == _ResultType.ErrorCircuit.value: + elif d["type"] == _ResultType.ErrorCircuit.value: return ResultErrorCircuit.from_dict(d) - elif d['type'] == _ResultType.ErrorStream.value: + elif d["type"] == _ResultType.ErrorStream.value: return ResultErrorStream.from_dict(d) - elif d['type'] == _ResultType.ErrorAuth.value: + elif d["type"] == _ResultType.ErrorAuth.value: return ResultErrorAuth.from_dict(d) - elif d['type'] == _ResultType.ErrorSecondRelay.value: + elif d["type"] == _ResultType.ErrorSecondRelay.value: return ResultErrorSecondRelay.from_dict(d) - elif d['type'] == _ResultType.ErrorDestination.value: + elif d["type"] == _ResultType.ErrorDestination.value: return ResultErrorDestination.from_dict(d) else: raise NotImplementedError( - 'Unknown result type {}'.format(d['type'])) + "Unknown result type {}".format(d["type"]) + )
def __str__(self): return json.dumps(self.to_dict(), cls=CustomEncoder) @@ -432,7 +460,7 @@ class ResultError(Result):
@property def freshness_reduction_factor(self): - ''' + """ When the RelayPrioritizer encounters this Result, how much should it adjust its freshness? (See RelayPrioritizer.best_priority() for more information about "freshness") @@ -444,7 +472,7 @@ class ResultError(Result): The value 0.5 was chosen somewhat arbitrarily, but a few weeks of live network testing verifies that sbws is still able to perform useful measurements in a reasonable amount of time. - ''' + """ return 0.5
@property @@ -456,23 +484,34 @@ class ResultError(Result): assert isinstance(d, dict) return ResultError( Result.Relay( - d['fingerprint'], d['nickname'], d['address'], - d['master_key_ed25519'], - relay_in_recent_consensus= # noqa - d.get('relay_in_recent_consensus', None), # noqa - relay_recent_measurement_attempt= # noqa - d.get('relay_recent_measurement_attempt', None), # noqa - relay_recent_priority_list= # noqa - d.get('relay_recent_priority_list', None), # noqa - ), - d['circ'], d['dest_url'], d['scanner'], - msg=d['msg'], t=d['time']) + d["fingerprint"], + d["nickname"], + d["address"], + d["master_key_ed25519"], + relay_in_recent_consensus=d.get( # noqa + "relay_in_recent_consensus", None + ), # noqa + relay_recent_measurement_attempt=d.get( # noqa + "relay_recent_measurement_attempt", None + ), # noqa + relay_recent_priority_list=d.get( # noqa + "relay_recent_priority_list", None + ), # noqa + ), + d["circ"], + d["dest_url"], + d["scanner"], + msg=d["msg"], + t=d["time"], + )
def to_dict(self): d = super().to_dict() - d.update({ - 'msg': self.msg, - }) + d.update( + { + "msg": self.msg, + } + ) return d
@@ -486,7 +525,7 @@ class ResultErrorCircuit(ResultError):
@property def freshness_reduction_factor(self): - ''' + """ There are a few instances when it isn't the relay's fault that the circuit failed to get built. Maybe someday we'll try detecting whose fault it most likely was and subclassing ResultErrorCircuit. But for @@ -496,7 +535,7 @@ class ResultErrorCircuit(ResultError): A (hopefully very very rare) example of when a circuit would fail to get built is when the sbws client machine suddenly loses Internet access. - ''' + """ return 0.6
@staticmethod @@ -504,17 +543,26 @@ class ResultErrorCircuit(ResultError): assert isinstance(d, dict) return ResultErrorCircuit( Result.Relay( - d['fingerprint'], d['nickname'], d['address'], - d['master_key_ed25519'], - relay_in_recent_consensus= # noqa - d.get('relay_in_recent_consensus', None), # noqa - relay_recent_measurement_attempt= # noqa - d.get('relay_recent_measurement_attempt', None), # noqa - relay_recent_priority_list= # noqa - d.get('relay_recent_priority_list', None), # noqa - ), - d['circ'], d['dest_url'], d['scanner'], - msg=d['msg'], t=d['time']) + d["fingerprint"], + d["nickname"], + d["address"], + d["master_key_ed25519"], + relay_in_recent_consensus=d.get( # noqa + "relay_in_recent_consensus", None + ), # noqa + relay_recent_measurement_attempt=d.get( # noqa + "relay_recent_measurement_attempt", None + ), # noqa + relay_recent_priority_list=d.get( # noqa + "relay_recent_priority_list", None + ), # noqa + ), + d["circ"], + d["dest_url"], + d["scanner"], + msg=d["msg"], + t=d["time"], + )
def to_dict(self): d = super().to_dict() @@ -534,17 +582,26 @@ class ResultErrorStream(ResultError): assert isinstance(d, dict) return ResultErrorStream( Result.Relay( - d['fingerprint'], d['nickname'], d['address'], - d['master_key_ed25519'], - relay_in_recent_consensus= # noqa - d.get('relay_in_recent_consensus', None), # noqa - relay_recent_measurement_attempt= # noqa - d.get('relay_recent_measurement_attempt', None), # noqa - relay_recent_priority_list= # noqa - d.get('relay_recent_priority_list', None), # noqa - ), - d['circ'], d['dest_url'], d['scanner'], - msg=d['msg'], t=d['time']) + d["fingerprint"], + d["nickname"], + d["address"], + d["master_key_ed25519"], + relay_in_recent_consensus=d.get( # noqa + "relay_in_recent_consensus", None + ), # noqa + relay_recent_measurement_attempt=d.get( # noqa + "relay_recent_measurement_attempt", None + ), # noqa + relay_recent_priority_list=d.get( # noqa + "relay_recent_priority_list", None + ), # noqa + ), + d["circ"], + d["dest_url"], + d["scanner"], + msg=d["msg"], + t=d["time"], + )
def to_dict(self): d = super().to_dict() @@ -571,6 +628,7 @@ class ResultErrorSecondRelay(ResultError): In a future refactor, there should be only one ``ResultError`` class and assign the type in the ``scanner`` module. """ + def __init__(self, *a, **kw): super().__init__(*a, **kw)
@@ -583,17 +641,26 @@ class ResultErrorSecondRelay(ResultError): assert isinstance(d, dict) return ResultErrorSecondRelay( Result.Relay( - d['fingerprint'], d['nickname'], d['address'], - d['master_key_ed25519'], - relay_in_recent_consensus= # noqa - d.get('relay_in_recent_consensus', None), # noqa - relay_recent_measurement_attempt= # noqa - d.get('relay_recent_measurement_attempt', None), # noqa - relay_recent_priority_list= # noqa - d.get('relay_recent_priority_list', None), # noqa - ), - d['circ'], d['dest_url'], d['scanner'], - msg=d['msg'], t=d['time']) + d["fingerprint"], + d["nickname"], + d["address"], + d["master_key_ed25519"], + relay_in_recent_consensus=d.get( # noqa + "relay_in_recent_consensus", None + ), # noqa + relay_recent_measurement_attempt=d.get( # noqa + "relay_recent_measurement_attempt", None + ), # noqa + relay_recent_priority_list=d.get( # noqa + "relay_recent_priority_list", None + ), # noqa + ), + d["circ"], + d["dest_url"], + d["scanner"], + msg=d["msg"], + t=d["time"], + )
def to_dict(self): d = super().to_dict() @@ -613,6 +680,7 @@ class ResultErrorDestination(ResultError): In a future refactor, there should be only one ``ResultError`` class and assign the type in the ``scanner`` module. """ + def __init__(self, *a, **kw): super().__init__(*a, **kw)
@@ -625,17 +693,26 @@ class ResultErrorDestination(ResultError): assert isinstance(d, dict) return ResultErrorSecondRelay( Result.Relay( - d['fingerprint'], d['nickname'], d['address'], - d['master_key_ed25519'], - d['circ'], d['dest_url'], d['scanner'], - relay_in_recent_consensus= # noqa - d.get('relay_in_recent_consensus', None), # noqa - relay_recent_measurement_attempt= # noqa - d.get('relay_recent_measurement_attempt', None), # noqa - relay_recent_priority_list= # noqa - d.get('relay_recent_priority_list', None), # noqa - ), - msg=d['msg'], t=d['time']) + d["fingerprint"], + d["nickname"], + d["address"], + d["master_key_ed25519"], + d["circ"], + d["dest_url"], + d["scanner"], + relay_in_recent_consensus=d.get( # noqa + "relay_in_recent_consensus", None + ), # noqa + relay_recent_measurement_attempt=d.get( # noqa + "relay_recent_measurement_attempt", None + ), # noqa + relay_recent_priority_list=d.get( # noqa + "relay_recent_priority_list", None + ), # noqa + ), + msg=d["msg"], + t=d["time"], + )
def to_dict(self): d = super().to_dict() @@ -652,7 +729,7 @@ class ResultErrorAuth(ResultError):
@property def freshness_reduction_factor(self): - ''' + """ Override the default ResultError.freshness_reduction_factor because a ResultErrorAuth is most likely not the measured relay's fault, so we shouldn't hurt its priority as much. A higher reduction factor means a @@ -660,7 +737,7 @@ class ResultErrorAuth(ResultError): priority better.
The value 0.9 was chosen somewhat arbitrarily. - ''' + """ return 0.9
@staticmethod @@ -668,17 +745,26 @@ class ResultErrorAuth(ResultError): assert isinstance(d, dict) return ResultErrorAuth( Result.Relay( - d['fingerprint'], d['nickname'], d['address'], - d['master_key_ed25519'], - relay_in_recent_consensus= # noqa - d.get('relay_in_recent_consensus', None), # noqa - relay_recent_measurement_attempt= # noqa - d.get('relay_recent_measurement_attempt', None), # noqa - relay_recent_priority_list= # noqa - d.get('relay_recent_priority_list', None), # noqa - ), - d['circ'], d['dest_url'], d['scanner'], - msg=d['msg'], t=d['time']) + d["fingerprint"], + d["nickname"], + d["address"], + d["master_key_ed25519"], + relay_in_recent_consensus=d.get( # noqa + "relay_in_recent_consensus", None + ), # noqa + relay_recent_measurement_attempt=d.get( # noqa + "relay_recent_measurement_attempt", None + ), # noqa + relay_recent_priority_list=d.get( # noqa + "relay_recent_priority_list", None + ), # noqa + ), + d["circ"], + d["dest_url"], + d["scanner"], + msg=d["msg"], + t=d["time"], + )
def to_dict(self): d = super().to_dict() @@ -707,46 +793,59 @@ class ResultSuccess(Result): def from_dict(d): assert isinstance(d, dict) return ResultSuccess( - d['rtts'] or [], d['downloads'], + d["rtts"] or [], + d["downloads"], Result.Relay( - d['fingerprint'], d['nickname'], d['address'], - d['master_key_ed25519'], d['relay_average_bandwidth'], - d.get('relay_burst_bandwidth'), d['relay_observed_bandwidth'], - d.get('consensus_bandwidth'), - d.get('consensus_bandwidth_is_unmeasured'), - relay_in_recent_consensus= # noqa - d.get('relay_in_recent_consensus', None), # noqa - relay_recent_measurement_attempt= # noqa - d.get('relay_recent_measurement_attempt', None), # noqa - relay_recent_priority_list= # noqa - d.get('relay_recent_priority_list', None), # noqa - ), - d['circ'], d['dest_url'], d['scanner'], - t=d['time']) + d["fingerprint"], + d["nickname"], + d["address"], + d["master_key_ed25519"], + d["relay_average_bandwidth"], + d.get("relay_burst_bandwidth"), + d["relay_observed_bandwidth"], + d.get("consensus_bandwidth"), + d.get("consensus_bandwidth_is_unmeasured"), + relay_in_recent_consensus=d.get( # noqa + "relay_in_recent_consensus", None + ), # noqa + relay_recent_measurement_attempt=d.get( # noqa + "relay_recent_measurement_attempt", None + ), # noqa + relay_recent_priority_list=d.get( # noqa + "relay_recent_priority_list", None + ), # noqa + ), + d["circ"], + d["dest_url"], + d["scanner"], + t=d["time"], + )
def to_dict(self): d = super().to_dict() - d.update({ - 'rtts': self.rtts, - 'downloads': self.downloads, - 'relay_average_bandwidth': self.relay_average_bandwidth, - 'relay_burst_bandwidth': self.relay_burst_bandwidth, - 'relay_observed_bandwidth': self.relay_observed_bandwidth, - 'consensus_bandwidth': self.consensus_bandwidth, - 'consensus_bandwidth_is_unmeasured': - self.consensus_bandwidth_is_unmeasured, - }) + d.update( + { + "rtts": self.rtts, + "downloads": self.downloads, + "relay_average_bandwidth": self.relay_average_bandwidth, + "relay_burst_bandwidth": self.relay_burst_bandwidth, + "relay_observed_bandwidth": self.relay_observed_bandwidth, + "consensus_bandwidth": self.consensus_bandwidth, + "consensus_bandwidth_is_unmeasured": self.consensus_bandwidth_is_unmeasured, + } + ) return d
class ResultDump: - ''' Runs the enter() method in a new thread and collects new Results on its - queue. Writes them to daily result files in the data directory ''' + """Runs the enter() method in a new thread and collects new Results on its + queue. Writes them to daily result files in the data directory""" + def __init__(self, args, conf): - assert os.path.isdir(conf.getpath('paths', 'datadir')) + assert os.path.isdir(conf.getpath("paths", "datadir")) self.conf = conf - self.fresh_days = conf.getint('general', 'data_period') - self.datadir = conf.getpath('paths', 'datadir') + self.fresh_days = conf.getint("general", "data_period") + self.datadir = conf.getpath("paths", "datadir") self.data = {} self.data_lock = RLock() self.thread = Thread(target=self.enter) @@ -757,7 +856,7 @@ class ResultDump: fail_hard(e)
def store_result(self, result): - ''' Call from ResultDump thread ''' + """ Call from ResultDump thread """ assert isinstance(result, Result) with self.data_lock: fp = result.fingerprint @@ -771,27 +870,42 @@ class ResultDump: # file.
def handle_result(self, result): - ''' Call from ResultDump thread. If we are shutting down, ignores - ResultError* types ''' + """Call from ResultDump thread. If we are shutting down, ignores + ResultError* types""" assert isinstance(result, Result) fp = result.fingerprint nick = result.nickname if isinstance(result, ResultError) and settings.end_event.is_set(): - log.debug('Ignoring %s for %s %s because we are shutting down', - type(result).__name__, nick, fp) + log.debug( + "Ignoring %s for %s %s because we are shutting down", + type(result).__name__, + nick, + fp, + ) return self.store_result(result) write_result_to_datadir(result, self.datadir) if result.type == "success": - msg = "Success measuring {} ({}) via circuit {} and " \ - "destination {}".format( - result.fingerprint, result.nickname, result.circ, - result.dest_url) + msg = ( + "Success measuring {} ({}) via circuit {} and " + "destination {}".format( + result.fingerprint, + result.nickname, + result.circ, + result.dest_url, + ) + ) else: - msg = "Error measuring {} ({}) via circuit {} and " \ - "destination {}: {}".format( - result.fingerprint, result.nickname, result.circ, - result.dest_url, result.msg) + msg = ( + "Error measuring {} ({}) via circuit {} and " + "destination {}: {}".format( + result.fingerprint, + result.nickname, + result.circ, + result.dest_url, + result.msg, + ) + ) # The result doesn't store the exit policies, so it can't be logged # whether it was an exit. if result.circ: @@ -799,8 +913,10 @@ class ResultDump: msg += ". As exit." if as_exit else ". As entry." # When the error is that there are not more functional destinations. if result.type == "error-destination": - log.warning("Shutting down because there are not functional " - "destinations.") + log.warning( + "Shutting down because there are not functional " + "destinations." + ) # NOTE: Because this is executed in a thread, stop_threads can not # be call from here, it has to be call from the main thread. # Instead set the singleton end event, that will call stop_threads @@ -830,7 +946,8 @@ class ResultDump: """ with self.data_lock: self.data = load_recent_results_in_datadir( - self.fresh_days, self.datadir) + self.fresh_days, self.datadir + ) while not (settings.end_event.is_set() and self.queue.empty()): try: event = self.queue.get(timeout=1) @@ -838,7 +955,7 @@ class ResultDump: continue data = event if data is None: - log.debug('Got None in ResultDump') + log.debug("Got None in ResultDump") continue elif isinstance(data, list): for r in data: @@ -847,9 +964,12 @@ class ResultDump: elif isinstance(data, Result): self.handle_result(data) else: - log.warning('The only thing we should ever receive in the ' - 'result thread is a Result or list of Results. ' - 'Ignoring %s', type(data)) + log.warning( + "The only thing we should ever receive in the " + "result thread is a Result or list of Results. " + "Ignoring %s", + type(data), + )
def results_for_relay(self, relay): assert isinstance(relay, Relay) diff --git a/sbws/lib/scaling.py b/sbws/lib/scaling.py index 1af007e..52b2cd2 100644 --- a/sbws/lib/scaling.py +++ b/sbws/lib/scaling.py @@ -6,8 +6,7 @@ from sbws.util.stem import rs_relay_type
def bw_measurements_from_results(results): return [ - dl['amount'] / dl['duration'] - for r in results for dl in r.downloads + dl["amount"] / dl["duration"] for r in results for dl in r.downloads ]
diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py index 88bfc56..bb9b028 100644 --- a/sbws/lib/v3bwfile.py +++ b/sbws/lib/v3bwfile.py @@ -13,23 +13,36 @@ from statistics import median, mean from stem.descriptor import parse_file
from sbws import __version__ -from sbws.globals import (SPEC_VERSION, BW_LINE_SIZE, SBWS_SCALE_CONSTANT, - TORFLOW_SCALING, SBWS_SCALING, TORFLOW_BW_MARGIN, - TORFLOW_OBS_LAST, TORFLOW_OBS_MEAN, - PROP276_ROUND_DIG, MIN_REPORT, MAX_BW_DIFF_PERC) +from sbws.globals import ( + SPEC_VERSION, + BW_LINE_SIZE, + SBWS_SCALE_CONSTANT, + TORFLOW_SCALING, + SBWS_SCALING, + TORFLOW_BW_MARGIN, + TORFLOW_OBS_LAST, + TORFLOW_OBS_MEAN, + PROP276_ROUND_DIG, + MIN_REPORT, + MAX_BW_DIFF_PERC, +) from sbws.lib import scaling from sbws.lib.resultdump import ResultSuccess, _ResultType from sbws.util.filelock import DirectoryLock -from sbws.util.timestamp import (now_isodt_str, unixts_to_isodt_str, - now_unixts, isostr_to_dt_obj, - dt_obj_to_isodt_str) +from sbws.util.timestamp import ( + now_isodt_str, + unixts_to_isodt_str, + now_unixts, + isostr_to_dt_obj, + dt_obj_to_isodt_str, +) from sbws.util.state import State
log = logging.getLogger(__name__)
-LINE_SEP = '\n' -KEYVALUE_SEP_V1 = '=' -KEYVALUE_SEP_V2 = ' ' +LINE_SEP = "\n" +KEYVALUE_SEP_V1 = "=" +KEYVALUE_SEP_V2 = " "
# NOTE: in a future refactor make make all the KeyValues be a dictionary # with their type, so that it's more similar to stem parser. @@ -37,7 +50,7 @@ KEYVALUE_SEP_V2 = ' ' # Header KeyValues # ================= # KeyValues that need to be in a specific order in the Bandwidth File. -HEADER_KEYS_V1_1_ORDERED = ['version'] +HEADER_KEYS_V1_1_ORDERED = ["version"] # KeyValues that are not initialized from the state file nor the measurements. # They can also be pass as an argument to `Header` to overwrite default values, # what is done in unit tests. @@ -83,19 +96,19 @@ HEADER_RECENT_MEASUREMENTS_EXCLUDED_KEYS = [ # Number of relays that were measured but all the measurements failed # because of network failures or it was # not found a suitable helper relay - 'recent_measurements_excluded_error_count', + "recent_measurements_excluded_error_count", # Number of relays that have successful measurements but the measurements # were not away from each other in X time (by default 1 day). - 'recent_measurements_excluded_near_count', + "recent_measurements_excluded_near_count", # Number of relays that have successful measurements and they are away from # each other but they are not X time recent. # By default this is 5 days, which is the same time the older # the measurements can be by default. - 'recent_measurements_excluded_old_count', + "recent_measurements_excluded_old_count", # Number of relays that have successful measurements and they are away from # each other and recent # but the number of measurements are less than X (by default 2). - 'recent_measurements_excluded_few_count', + "recent_measurements_excluded_few_count", ] # Added in #29591 # NOTE: recent_consensus_count, recent_priority_list_count, @@ -105,29 +118,29 @@ HEADER_RECENT_MEASUREMENTS_EXCLUDED_KEYS = [ HEADER_KEYS_V1_4 = [ # 1.1 header: the number of different consensuses, that sbws has seen, # since the last 5 days - 'recent_consensus_count', + "recent_consensus_count", # 2.4 Number of times a priority list has been created - 'recent_priority_list_count', + "recent_priority_list_count", # 2.5 Number of relays that there were in a priority list # [50, number of relays in the network * 0.05] - 'recent_priority_relay_count', + "recent_priority_relay_count", # 3.6 header: the number of times that sbws has tried to measure any relay, # since the last 5 days # This would be the number of times a relays were in a priority list - 'recent_measurement_attempt_count', + "recent_measurement_attempt_count", # 3.7 header: the number of times that sbws has tried to measure any relay, # since the last 5 days, but it didn't work # This should be the number of attempts - number of ResultSuccess - # something else we don't know yet # So far is the number of ResultError - 'recent_measurement_failure_count', + "recent_measurement_failure_count", # The time it took to report about half of the network. - 'time_to_report_half_network', + "time_to_report_half_network", ] + HEADER_RECENT_MEASUREMENTS_EXCLUDED_KEYS
# Tor version will be obtained from the state file, so it won't be pass as an # argument, but will be self-initialized. -HEADER_KEYS_V1_4_TO_INIT = ['tor_version'] +HEADER_KEYS_V1_4_TO_INIT = ["tor_version"]
# KeyValues that are initialized from arguments, not self-initialized. HEADER_INIT_KEYS = ( @@ -151,7 +164,7 @@ HEADER_UNORDERED_KEYS = ( # List of all the KeyValues currently being used to generate the file HEADER_ALL_KEYS = HEADER_KEYS_V1_1_ORDERED + HEADER_UNORDERED_KEYS
-TERMINATOR = '=====' +TERMINATOR = "====="
# Bandwidth Lines KeyValues # ========================= @@ -160,9 +173,9 @@ NUM_LINES_HEADER_V1 = len(HEADER_ALL_KEYS) + 2 LINE_TERMINATOR = TERMINATOR + LINE_SEP
# KeyValue separator in Bandwidth Lines -BWLINE_KEYVALUES_SEP_V1 = ' ' +BWLINE_KEYVALUES_SEP_V1 = " " # not inclding in the files the extra bws for now -BWLINE_KEYS_V0 = ['node_id', 'bw'] +BWLINE_KEYS_V0 = ["node_id", "bw"] BWLINE_KEYS_V1_1 = [ "master_key_ed25519", "nick", @@ -193,39 +206,38 @@ BWLINE_KEYS_V1_2 = [ BWLINE_KEYS_V1_4 = [ # 1.2 relay: the number of different consensuses, that sbws has seen, # since the last 5 days, that have this relay - 'relay_in_recent_consensus_count', + "relay_in_recent_consensus_count", # 2.6 relay: the number of times a relay was "prioritized" to be measured # in the recent days (by default 5). - 'relay_recent_priority_list_count', + "relay_recent_priority_list_count", # 3.8 relay: the number of times that sbws has tried to measure # this relay, since the last 5 days # This would be the number of times a relay was in a priority list (2.6) # since once it gets measured, it either returns ResultError, # ResultSuccess or something else happened that we don't know yet - 'relay_recent_measurement_attempt_count', + "relay_recent_measurement_attempt_count", # 3.9 relay: the number of times that sbws has tried to measure # this relay, since the last 5 days, but it didn't work # This should be the number of attempts - number of ResultSuccess - # something else we don't know yet # So far is the number of ResultError - 'relay_recent_measurement_failure_count', + "relay_recent_measurement_failure_count", # Number of error results created in the last 5 days that are excluded. # This is the sum of all the errors. - 'relay_recent_measurements_excluded_error_count', + "relay_recent_measurements_excluded_error_count", # The number of successful results, created in the last 5 days, # that were excluded by a rule, for this relay. # 'relay_recent_measurements_excluded_error_count' would be the # sum of the following 3 + the number of error results. - # The number of successful measurements that are not X time away # from each other (by default 1 day). - 'relay_recent_measurements_excluded_near_count', + "relay_recent_measurements_excluded_near_count", # The number of successful measurements that are away from each other # but not X time recent (by default 5 days). - 'relay_recent_measurements_excluded_old_count', + "relay_recent_measurements_excluded_old_count", # The number of measurements excluded because they are not at least X # (by default 2). - 'relay_recent_measurements_excluded_few_count', + "relay_recent_measurements_excluded_few_count", # `vote=0` is used for the relays that were excluded to # be reported in the bandwidth file and now they are # reported. @@ -234,14 +246,16 @@ BWLINE_KEYS_V1_4 = [ # added in case Tor would vote on them in future versions. # Maybe these keys should not be included for the relays # in which vote=1 and unmeasured=0. - 'vote', 'unmeasured', + "vote", + "unmeasured", # When there not enough eligible relays (not excluded) # under_min_report is 1, `vote` is 0. # Added in #29853. - 'under_min_report', + "under_min_report", ] -BWLINE_KEYS_V1 = BWLINE_KEYS_V0 + BWLINE_KEYS_V1_1 + BWLINE_KEYS_V1_2 \ - + BWLINE_KEYS_V1_4 +BWLINE_KEYS_V1 = ( + BWLINE_KEYS_V0 + BWLINE_KEYS_V1_1 + BWLINE_KEYS_V1_2 + BWLINE_KEYS_V1_4 +) # NOTE: tech-debt: assign boolean type to vote and unmeasured, # when the attributes are defined with a type, as stem does. BWLINE_INT_KEYS = ( @@ -257,17 +271,17 @@ BWLINE_INT_KEYS = ( + BWLINE_KEYS_V1_4 ) # This is boolean, not int. -BWLINE_INT_KEYS.remove('consensus_bandwidth_is_unmeasured') +BWLINE_INT_KEYS.remove("consensus_bandwidth_is_unmeasured")
def round_sig_dig(n, digits=PROP276_ROUND_DIG): """Round n to 'digits' significant digits in front of the decimal point. - Results less than or equal to 1 are rounded to 1. - Returns an integer. + Results less than or equal to 1 are rounded to 1. + Returns an integer.
- digits must be greater than 0. - n must be less than or equal to 2**73, to avoid floating point errors. - """ + digits must be greater than 0. + n must be less than or equal to 2**73, to avoid floating point errors. + """ digits = int(digits) assert digits >= 1 if n <= 1: @@ -280,13 +294,13 @@ def round_sig_dig(n, digits=PROP276_ROUND_DIG):
def kb_round_x_sig_dig(bw_bs, digits=PROP276_ROUND_DIG): """Convert bw_bs from bytes to kilobytes, and round the result to - 'digits' significant digits. - Results less than or equal to 1 are rounded up to 1. - Returns an integer. + 'digits' significant digits. + Results less than or equal to 1 are rounded up to 1. + Returns an integer.
- digits must be greater than 0. - n must be less than or equal to 2**82, to avoid floating point errors. - """ + digits must be greater than 0. + n must be less than or equal to 2**82, to avoid floating point errors. + """ # avoid double-rounding by using floating-point bw_kb = bw_bs / 1000.0 return round_sig_dig(bw_kb, digits=digits) @@ -298,7 +312,7 @@ def num_results_of_type(results, type_str):
# Better way to use enums? def result_type_to_key(type_str): - return type_str.replace('-', '_') + return type_str.replace("-", "_")
class V3BWHeader(object): @@ -318,30 +332,39 @@ class V3BWHeader(object): - generator_started: str, ISO 8601 timestamp in UTC time zone when the generator started """ + def __init__(self, timestamp, **kwargs): assert isinstance(timestamp, str) for v in kwargs.values(): assert isinstance(v, str) self.timestamp = timestamp # KeyValues with default value when not given by kwargs - self.version = kwargs.get('version', SPEC_VERSION) - self.software = kwargs.get('software', 'sbws') - self.software_version = kwargs.get('software_version', __version__) - self.file_created = kwargs.get('file_created', now_isodt_str()) + self.version = kwargs.get("version", SPEC_VERSION) + self.software = kwargs.get("software", "sbws") + self.software_version = kwargs.get("software_version", __version__) + self.file_created = kwargs.get("file_created", now_isodt_str()) # latest_bandwidth should not be in kwargs, since it MUST be the # same as timestamp self.latest_bandwidth = unixts_to_isodt_str(timestamp) - [setattr(self, k, v) for k, v in kwargs.items() - if k in HEADER_INIT_KEYS] + [ + setattr(self, k, v) + for k, v in kwargs.items() + if k in HEADER_INIT_KEYS + ]
def __str__(self): - if self.version.startswith('1.'): + if self.version.startswith("1."): return self.strv1 return self.strv2
@classmethod - def from_results(cls, results, scanner_country=None, - destinations_countries=None, state_fpath=''): + def from_results( + cls, + results, + scanner_country=None, + destinations_countries=None, + state_fpath="", + ): kwargs = dict() latest_bandwidth = cls.latest_bandwidth_from_results(results) earliest_bandwidth = cls.earliest_bandwidth_from_results(results) @@ -354,27 +377,29 @@ class V3BWHeader(object): # this state variable. # Store the state as an attribute of the object? state = State(state_fpath) - tor_version = state.get('tor_version', None) + tor_version = state.get("tor_version", None) if tor_version: - kwargs['tor_version'] = tor_version + kwargs["tor_version"] = tor_version
- kwargs['latest_bandwidth'] = unixts_to_isodt_str(latest_bandwidth) - kwargs['earliest_bandwidth'] = unixts_to_isodt_str(earliest_bandwidth) + kwargs["latest_bandwidth"] = unixts_to_isodt_str(latest_bandwidth) + kwargs["earliest_bandwidth"] = unixts_to_isodt_str(earliest_bandwidth) if generator_started is not None: - kwargs['generator_started'] = generator_started + kwargs["generator_started"] = generator_started # To be compatible with older bandwidth files, do not require it. if scanner_country is not None: - kwargs['scanner_country'] = scanner_country + kwargs["scanner_country"] = scanner_country if destinations_countries is not None: - kwargs['destinations_countries'] = destinations_countries + kwargs["destinations_countries"] = destinations_countries if recent_consensus_count is not None: - kwargs['recent_consensus_count'] = recent_consensus_count + kwargs["recent_consensus_count"] = recent_consensus_count
- recent_measurement_attempt_count = \ + recent_measurement_attempt_count = ( cls.recent_measurement_attempt_count_from_file(state_fpath) + ) if recent_measurement_attempt_count is not None: - kwargs['recent_measurement_attempt_count'] = \ - str(recent_measurement_attempt_count) + kwargs["recent_measurement_attempt_count"] = str( + recent_measurement_attempt_count + )
# If it is a failure that is not a ResultError, then # failures = attempts - all mesaurements @@ -385,19 +410,22 @@ class V3BWHeader(object): all_measurements = 0 for result_list in results.values(): all_measurements += len(result_list) - measurement_failures = (recent_measurement_attempt_count - - all_measurements) - kwargs['recent_measurement_failure_count'] = \ - str(measurement_failures) + measurement_failures = ( + recent_measurement_attempt_count - all_measurements + ) + kwargs["recent_measurement_failure_count"] = str( + measurement_failures + )
priority_lists = cls.recent_priority_list_count_from_file(state_fpath) if priority_lists is not None: - kwargs['recent_priority_list_count'] = str(priority_lists) + kwargs["recent_priority_list_count"] = str(priority_lists)
- priority_relays = \ - cls.recent_priority_relay_count_from_file(state_fpath) + priority_relays = cls.recent_priority_relay_count_from_file( + state_fpath + ) if priority_relays is not None: - kwargs['recent_priority_relay_count'] = str(priority_relays) + kwargs["recent_priority_relay_count"] = str(priority_relays)
h = cls(timestamp, **kwargs) return h @@ -413,15 +441,19 @@ class V3BWHeader(object): index_terminator = lines.index(TERMINATOR) except ValueError: # is not a bw file or is v100 - log.warn('Terminator is not in lines') + log.warn("Terminator is not in lines") return None ts = lines[0] - kwargs = dict([l.split(KEYVALUE_SEP_V1) - for l in lines[:index_terminator] - if l.split(KEYVALUE_SEP_V1)[0] in HEADER_ALL_KEYS]) + kwargs = dict( + [ + l.split(KEYVALUE_SEP_V1) + for l in lines[:index_terminator] + if l.split(KEYVALUE_SEP_V1)[0] in HEADER_ALL_KEYS + ] + ) h = cls(ts, **kwargs) # last line is new line - return h, lines[index_terminator + 1:-1] + return h, lines[index_terminator + 1 : -1]
@classmethod def from_text_v1(self, text): @@ -445,14 +477,14 @@ class V3BWHeader(object):
@staticmethod def generator_started_from_file(state_fpath): - ''' + """ ISO formatted timestamp for the time when the scanner process most recently started. - ''' + """ state = State(state_fpath) - if 'scanner_started' in state: + if "scanner_started" in state: # From v1.1.0-dev `state` is capable of converting strs to datetime - return dt_obj_to_isodt_str(state['scanner_started']) + return dt_obj_to_isodt_str(state["scanner_started"]) else: return None
@@ -472,7 +504,7 @@ class V3BWHeader(object): in the recent (by default 5) days from the state file. """ state = State(state_fpath) - return state.count('recent_measurement_attempt') + return state.count("recent_measurement_attempt")
@staticmethod def recent_priority_list_count_from_file(state_fpath): @@ -483,7 +515,7 @@ class V3BWHeader(object): in the recent (by default 5) days from the state file. """ state = State(state_fpath) - return state.count('recent_priority_list') + return state.count("recent_priority_list")
@staticmethod def recent_priority_relay_count_from_file(state_fpath): @@ -492,7 +524,7 @@ class V3BWHeader(object): in the recent (by default 5) days from the state file. """ state = State(state_fpath) - return state.count('recent_priority_relay') + return state.count("recent_priority_relay")
@staticmethod def latest_bandwidth_from_results(results): @@ -506,41 +538,50 @@ class V3BWHeader(object): def keyvalue_unordered_tuple_ls(self): """Return list of KeyValue tuples that do not have specific order.""" # sort the list to generate determinist headers - keyvalue_tuple_ls = sorted([(k, v) for k, v in self.__dict__.items() - if k in HEADER_UNORDERED_KEYS]) + keyvalue_tuple_ls = sorted( + [ + (k, v) + for k, v in self.__dict__.items() + if k in HEADER_UNORDERED_KEYS + ] + ) return keyvalue_tuple_ls
@property def keyvalue_tuple_ls(self): """Return list of all KeyValue tuples""" - return [('version', self.version)] + self.keyvalue_unordered_tuple_ls + return [("version", self.version)] + self.keyvalue_unordered_tuple_ls
@property def keyvalue_v1str_ls(self): """Return KeyValue list of strings following spec v1.X.X.""" - keyvalues = [self.timestamp] + [KEYVALUE_SEP_V1.join([k, v]) - for k, v in self.keyvalue_tuple_ls] + keyvalues = [self.timestamp] + [ + KEYVALUE_SEP_V1.join([k, v]) for k, v in self.keyvalue_tuple_ls + ] return keyvalues
@property def strv1(self): """Return header string following spec v1.X.X.""" - header_str = LINE_SEP.join(self.keyvalue_v1str_ls) + LINE_SEP + \ - LINE_TERMINATOR + header_str = ( + LINE_SEP.join(self.keyvalue_v1str_ls) + LINE_SEP + LINE_TERMINATOR + ) return header_str
@property def keyvalue_v2_ls(self): """Return KeyValue list of strings following spec v2.X.X.""" - keyvalue = [self.timestamp] + [KEYVALUE_SEP_V2.join([k, v]) - for k, v in self.keyvalue_tuple_ls] + keyvalue = [self.timestamp] + [ + KEYVALUE_SEP_V2.join([k, v]) for k, v in self.keyvalue_tuple_ls + ] return keyvalue
@property def strv2(self): """Return header string following spec v2.X.X.""" - header_str = LINE_SEP.join(self.keyvalue_v2_ls) + LINE_SEP + \ - LINE_TERMINATOR + header_str = ( + LINE_SEP.join(self.keyvalue_v2_ls) + LINE_SEP + LINE_TERMINATOR + ) return header_str
@property @@ -549,8 +590,11 @@ class V3BWHeader(object):
def add_stats(self, **kwargs): # Using kwargs because attributes might chage. - [setattr(self, k, str(v)) for k, v in kwargs.items() - if k in HEADER_KEYS_V1_2] + [ + setattr(self, k, str(v)) + for k, v in kwargs.items() + if k in HEADER_KEYS_V1_2 + ]
def add_time_report_half_network(self): """Add to the header the time it took to measure half of the network. @@ -579,15 +623,15 @@ class V3BWHeader(object): # writing to the file, so that they do not need to be converted back # to do some calculations. elapsed_time = ( - (isostr_to_dt_obj(self.latest_bandwidth) - - isostr_to_dt_obj(self.earliest_bandwidth)) - .total_seconds()) + isostr_to_dt_obj(self.latest_bandwidth) + - isostr_to_dt_obj(self.earliest_bandwidth) + ).total_seconds()
# This attributes were added later and some tests that # do not initialize them would fail. - eligible_relays = int(getattr(self, 'number_eligible_relays', 0)) - consensus_relays = int(getattr(self, 'number_consensus_relays', 0)) - if not(eligible_relays and consensus_relays): + eligible_relays = int(getattr(self, "number_eligible_relays", 0)) + consensus_relays = int(getattr(self, "number_consensus_relays", 0)) + if not (eligible_relays and consensus_relays): return
half_network = consensus_relays / 2 @@ -602,8 +646,10 @@ class V3BWHeader(object): estimated_time = round( elapsed_time * consensus_relays / eligible_relays ) - log.info("Estimated time to measure the network: %s hours.", - round(estimated_time / 60 / 60)) + log.info( + "Estimated time to measure the network: %s hours.", + round(estimated_time / 60 / 60), + )
def add_relays_excluded_counters(self, exclusion_dict): """ @@ -627,22 +673,32 @@ class V3BWLine(object): .. note:: tech-debt: move node_id and bw to kwargs and just ensure that the required values are in ``**kwargs`` """ + def __init__(self, node_id, bw, **kwargs): assert isinstance(node_id, str) - assert node_id.startswith('$') + assert node_id.startswith("$") self.node_id = node_id self.bw = bw # For now, we do not want to add ``bw_filt`` to the bandwidth file, # therefore it is set here but not added to ``BWLINE_KEYS_V1``. - [setattr(self, k, v) for k, v in kwargs.items() - if k in BWLINE_KEYS_V1 + ["bw_filt"]] + [ + setattr(self, k, v) + for k, v in kwargs.items() + if k in BWLINE_KEYS_V1 + ["bw_filt"] + ]
def __str__(self): return self.bw_strv1
@classmethod - def from_results(cls, results, secs_recent=None, secs_away=None, - min_num=0, router_statuses_d=None): + def from_results( + cls, + results, + secs_recent=None, + secs_away=None, + min_num=0, + router_statuses_d=None, + ): """Convert sbws results to relays' Bandwidth Lines
``bs`` stands for Bytes/seconds @@ -653,21 +709,23 @@ class V3BWLine(object): bw = data (Bytes) / time (seconds) """ # log.debug("Len success_results %s", len(success_results)) - node_id = '$' + results[0].fingerprint + node_id = "$" + results[0].fingerprint kwargs = dict() - kwargs['nick'] = results[0].nickname - if getattr(results[0], 'master_key_ed25519'): - kwargs['master_key_ed25519'] = results[0].master_key_ed25519 - kwargs['time'] = cls.last_time_from_results(results) + kwargs["nick"] = results[0].nickname + if getattr(results[0], "master_key_ed25519"): + kwargs["master_key_ed25519"] = results[0].master_key_ed25519 + kwargs["time"] = cls.last_time_from_results(results) kwargs.update(cls.result_types_from_results(results))
# If it has not the attribute, return list to be able to call len # If it has the attribute, but it is None, return also list - kwargs['relay_in_recent_consensus_count'] = str( - max([ - len(getattr(r, 'relay_in_recent_consensus', []) or []) - for r in results - ]) + kwargs["relay_in_recent_consensus_count"] = str( + max( + [ + len(getattr(r, "relay_in_recent_consensus", []) or []) + for r in results + ] + ) )
# Workaround for #34309. @@ -717,29 +775,29 @@ class V3BWLine(object): # reason. # This could be done in a better way as part of a refactor #28684.
- kwargs['vote'] = 0 - kwargs['unmeasured'] = 1 + kwargs["vote"] = 0 + kwargs["unmeasured"] = 1
exclusion_reason = None
number_excluded_error = len(results) - len(success_results) if number_excluded_error > 0: # then the number of error results is the number of results - kwargs['relay_recent_measurements_excluded_error_count'] = \ - number_excluded_error + kwargs[ + "relay_recent_measurements_excluded_error_count" + ] = number_excluded_error if not success_results: - exclusion_reason = 'recent_measurements_excluded_error_count' + exclusion_reason = "recent_measurements_excluded_error_count" return (cls(node_id, 1, **kwargs), exclusion_reason)
- results_away = \ - cls.results_away_each_other(success_results, secs_away) + results_away = cls.results_away_each_other(success_results, secs_away) number_excluded_near = len(success_results) - len(results_away) if number_excluded_near > 0: - kwargs['relay_recent_measurements_excluded_near_count'] = \ - number_excluded_near + kwargs[ + "relay_recent_measurements_excluded_near_count" + ] = number_excluded_near if not results_away: - exclusion_reason = \ - 'recent_measurements_excluded_near_count' + exclusion_reason = "recent_measurements_excluded_near_count" return (cls(node_id, 1, **kwargs), exclusion_reason) # log.debug("Results away from each other: %s", # [unixts_to_isodt_str(r.time) for r in results_away]) @@ -747,71 +805,72 @@ class V3BWLine(object): results_recent = cls.results_recent_than(results_away, secs_recent) number_excluded_old = len(results_away) - len(results_recent) if number_excluded_old > 0: - kwargs['relay_recent_measurements_excluded_old_count'] = \ - number_excluded_old + kwargs[ + "relay_recent_measurements_excluded_old_count" + ] = number_excluded_old if not results_recent: - exclusion_reason = \ - 'recent_measurements_excluded_old_count' + exclusion_reason = "recent_measurements_excluded_old_count" return (cls(node_id, 1, **kwargs), exclusion_reason)
if not len(results_recent) >= min_num: - kwargs['relay_recent_measurements_excluded_few_count'] = \ - len(results_recent) + kwargs["relay_recent_measurements_excluded_few_count"] = len( + results_recent + ) # log.debug('The number of results is less than %s', min_num) - exclusion_reason = \ - 'recent_measurements_excluded_few_count' + exclusion_reason = "recent_measurements_excluded_few_count" return (cls(node_id, 1, **kwargs), exclusion_reason)
# Use the last consensus if available, since the results' consensus # values come from the moment the measurement was made. if router_statuses_d and node_id in router_statuses_d: - consensus_bandwidth = \ - router_statuses_d[node_id].bandwidth * 1000 - consensus_bandwidth_is_unmeasured = \ - router_statuses_d[node_id].is_unmeasured + consensus_bandwidth = router_statuses_d[node_id].bandwidth * 1000 + consensus_bandwidth_is_unmeasured = router_statuses_d[ + node_id + ].is_unmeasured else: - consensus_bandwidth = \ - cls.consensus_bandwidth_from_results(results_recent) - consensus_bandwidth_is_unmeasured = \ + consensus_bandwidth = cls.consensus_bandwidth_from_results( + results_recent + ) + consensus_bandwidth_is_unmeasured = ( cls.consensus_bandwidth_is_unmeasured_from_results( - results_recent) + results_recent + ) + ) # If there is no last observed bandwidth, there won't be mean either. - desc_bw_obs_last = \ - cls.desc_bw_obs_last_from_results(results_recent) + desc_bw_obs_last = cls.desc_bw_obs_last_from_results(results_recent)
# Exclude also relays without consensus bandwidth nor observed # bandwidth, since they can't be scaled - if (desc_bw_obs_last is None and consensus_bandwidth is None): + if desc_bw_obs_last is None and consensus_bandwidth is None: # This reason is not counted, not added in the file, but it will # have vote = 0 - return(cls(node_id, 1), "no_consensus_no_observed_bw") + return (cls(node_id, 1), "no_consensus_no_observed_bw")
# For any line not excluded, do not include vote and unmeasured # KeyValues - del kwargs['vote'] - del kwargs['unmeasured'] + del kwargs["vote"] + del kwargs["unmeasured"]
rtt = cls.rtt_from_results(results_recent) if rtt: - kwargs['rtt'] = rtt + kwargs["rtt"] = rtt bw = cls.bw_median_from_results(results_recent) # XXX: all the class functions could use the bw_measurements instead of # obtaining them each time or use a class Measurements. bw_measurements = scaling.bw_measurements_from_results(results_recent) - kwargs['bw_mean'] = cls.bw_mean_from_results(results_recent) - kwargs['bw_filt'] = scaling.bw_filt(bw_measurements) - kwargs['bw_median'] = cls.bw_median_from_results( - results_recent) - kwargs['desc_bw_avg'] = \ - cls.desc_bw_avg_from_results(results_recent) - kwargs['desc_bw_bur'] = \ - cls.desc_bw_bur_from_results(results_recent) - kwargs['consensus_bandwidth'] = consensus_bandwidth - kwargs['consensus_bandwidth_is_unmeasured'] = \ - consensus_bandwidth_is_unmeasured - kwargs['desc_bw_obs_last'] = desc_bw_obs_last - kwargs['desc_bw_obs_mean'] = \ - cls.desc_bw_obs_mean_from_results(results_recent) + kwargs["bw_mean"] = cls.bw_mean_from_results(results_recent) + kwargs["bw_filt"] = scaling.bw_filt(bw_measurements) + kwargs["bw_median"] = cls.bw_median_from_results(results_recent) + kwargs["desc_bw_avg"] = cls.desc_bw_avg_from_results(results_recent) + kwargs["desc_bw_bur"] = cls.desc_bw_bur_from_results(results_recent) + kwargs["consensus_bandwidth"] = consensus_bandwidth + kwargs[ + "consensus_bandwidth_is_unmeasured" + ] = consensus_bandwidth_is_unmeasured + kwargs["desc_bw_obs_last"] = desc_bw_obs_last + kwargs["desc_bw_obs_mean"] = cls.desc_bw_obs_mean_from_results( + results_recent + )
bwl = cls(node_id, bw, **kwargs) return bwl, None @@ -824,16 +883,20 @@ class V3BWLine(object): @classmethod def from_bw_line_v1(cls, line): assert isinstance(line, str) - kwargs = dict([kv.split(KEYVALUE_SEP_V1) - for kv in line.split(BWLINE_KEYVALUES_SEP_V1) - if kv.split(KEYVALUE_SEP_V1)[0] in BWLINE_KEYS_V1]) + kwargs = dict( + [ + kv.split(KEYVALUE_SEP_V1) + for kv in line.split(BWLINE_KEYVALUES_SEP_V1) + if kv.split(KEYVALUE_SEP_V1)[0] in BWLINE_KEYS_V1 + ] + ) for k, v in kwargs.items(): if k in BWLINE_INT_KEYS: kwargs[k] = int(v) - node_id = kwargs['node_id'] - bw = kwargs['bw'] - del kwargs['node_id'] - del kwargs['bw'] + node_id = kwargs["node_id"] + bw = kwargs["bw"] + del kwargs["node_id"] + del kwargs["bw"] bw_line = cls(node_id, bw, **kwargs) return bw_line
@@ -854,9 +917,9 @@ class V3BWLine(object): def results_recent_than(results, secs_recent=None): if secs_recent is None: return results - results_recent = list(filter( - lambda x: (now_unixts() - x.time) < secs_recent, - results)) + results_recent = list( + filter(lambda x: (now_unixts() - x.time) < secs_recent, results) + ) # if not results_recent: # log.debug("Results are NOT more recent than %ss: %s", # secs_recent, @@ -865,16 +928,22 @@ class V3BWLine(object):
@staticmethod def bw_median_from_results(results): - bws = [dl['amount'] / dl['duration'] - for r in results for dl in r.downloads] + bws = [ + dl["amount"] / dl["duration"] + for r in results + for dl in r.downloads + ] if bws: return max(round(median(bws)), 1) return 1
@staticmethod def bw_mean_from_results(results): - bws = [dl['amount'] / dl['duration'] - for r in results for dl in r.downloads] + bws = [ + dl["amount"] / dl["duration"] + for r in results + for dl in r.downloads + ] # It's safe to return 0 here, because: # 1. this value will be the numerator when calculating the ratio. # 2. `kb_round_x_sig_dig` returns a minimum of 1. @@ -895,9 +964,15 @@ class V3BWLine(object):
@staticmethod def result_types_from_results(results): - rt_dict = dict([(result_type_to_key(rt.value), - num_results_of_type(results, rt.value)) - for rt in _ResultType]) + rt_dict = dict( + [ + ( + result_type_to_key(rt.value), + num_results_of_type(results, rt.value), + ) + for rt in _ResultType + ] + ) return rt_dict
@staticmethod @@ -960,8 +1035,9 @@ class V3BWLine(object): def bw_keyvalue_tuple_ls(self): """Return list of KeyValue Bandwidth Line tuples.""" # sort the list to generate determinist headers - keyvalue_tuple_ls = sorted([(k, v) for k, v in self.__dict__.items() - if k in BWLINE_KEYS_V1]) + keyvalue_tuple_ls = sorted( + [(k, v) for k, v in self.__dict__.items() if k in BWLINE_KEYS_V1] + ) return keyvalue_tuple_ls
@property @@ -969,20 +1045,26 @@ class V3BWLine(object): """Return list of KeyValue Bandwidth Line strings following spec v1.X.X. """ - bw_keyvalue_str = [KEYVALUE_SEP_V1 .join([k, str(v)]) - for k, v in self.bw_keyvalue_tuple_ls] + bw_keyvalue_str = [ + KEYVALUE_SEP_V1.join([k, str(v)]) + for k, v in self.bw_keyvalue_tuple_ls + ] return bw_keyvalue_str
@property def bw_strv1(self): """Return Bandwidth Line string following spec v1.X.X.""" - bw_line_str = BWLINE_KEYVALUES_SEP_V1.join( - self.bw_keyvalue_v1str_ls) + LINE_SEP + bw_line_str = ( + BWLINE_KEYVALUES_SEP_V1.join(self.bw_keyvalue_v1str_ls) + LINE_SEP + ) if len(bw_line_str) > BW_LINE_SIZE: # if this is the case, probably there are too many KeyValues, # or the limit needs to be changed in Tor - log.warn("The bandwidth line %s is longer than %s", - len(bw_line_str), BW_LINE_SIZE) + log.warn( + "The bandwidth line %s is longer than %s", + len(bw_line_str), + BW_LINE_SIZE, + ) return bw_line_str
def set_relay_type(self, relay_type): @@ -991,6 +1073,7 @@ class V3BWLine(object): def del_relay_type(self): delattr(self, "relay_type")
+ class V3BWFile(object): """ Create a Bandwidth List file following spec version 1.X.X @@ -998,25 +1081,35 @@ class V3BWFile(object): :param V3BWHeader v3bwheader: header :param list v3bwlines: V3BWLines """ + def __init__(self, v3bwheader, v3bwlines): self.header = v3bwheader self.bw_lines = v3bwlines
def __str__(self): - return str(self.header) + ''.join([str(bw_line) or '' - for bw_line in self.bw_lines]) + return str(self.header) + "".join( + [str(bw_line) or "" for bw_line in self.bw_lines] + )
@classmethod - def from_results(cls, results, scanner_country=None, - destinations_countries=None, state_fpath='', - scale_constant=SBWS_SCALE_CONSTANT, - scaling_method=TORFLOW_SCALING, - torflow_obs=TORFLOW_OBS_LAST, - torflow_cap=TORFLOW_BW_MARGIN, - round_digs=PROP276_ROUND_DIG, - secs_recent=None, secs_away=None, min_num=0, - consensus_path=None, max_bw_diff_perc=MAX_BW_DIFF_PERC, - reverse=False): + def from_results( + cls, + results, + scanner_country=None, + destinations_countries=None, + state_fpath="", + scale_constant=SBWS_SCALE_CONSTANT, + scaling_method=TORFLOW_SCALING, + torflow_obs=TORFLOW_OBS_LAST, + torflow_cap=TORFLOW_BW_MARGIN, + round_digs=PROP276_ROUND_DIG, + secs_recent=None, + secs_away=None, + min_num=0, + consensus_path=None, + max_bw_diff_perc=MAX_BW_DIFF_PERC, + reverse=False, + ): """Create V3BWFile class from sbws Results.
:param dict results: see below @@ -1034,15 +1127,17 @@ class V3BWFile(object): 'relay_fp2': [Result1, Result2, ...]}
""" - log.info('Processing results to generate a bandwidth list file.') - header = V3BWHeader.from_results(results, scanner_country, - destinations_countries, state_fpath) + log.info("Processing results to generate a bandwidth list file.") + header = V3BWHeader.from_results( + results, scanner_country, destinations_countries, state_fpath + ) bw_lines_raw = [] bw_lines_excluded = [] router_statuses_d = cls.read_router_statuses(consensus_path) # XXX: Use router_statuses_d to not parse again the file. - number_consensus_relays = \ - cls.read_number_consensus_relays(consensus_path) + number_consensus_relays = cls.read_number_consensus_relays( + consensus_path + ) state = State(state_fpath)
# Create a dictionary with the number of relays excluded by any of the @@ -1052,12 +1147,12 @@ class V3BWFile(object): # See also the comments in `from_results`. exclusion_dict = dict( [(k, 0) for k in HEADER_RECENT_MEASUREMENTS_EXCLUDED_KEYS] - ) + ) for fp, values in results.items(): # log.debug("Relay fp %s", fp) - line, reason = V3BWLine.from_results(values, secs_recent, - secs_away, min_num, - router_statuses_d) + line, reason = V3BWLine.from_results( + values, secs_recent, secs_away, min_num, router_statuses_d + ) # If there is no reason it means the line will not be excluded. if not reason: bw_lines_raw.append(line) @@ -1072,11 +1167,12 @@ class V3BWFile(object): if not bw_lines_raw: # It could be possible to scale the lines that were successful # even if excluded, but is not done here. - log.info("After applying restrictions to the raw results, " - "there is not any. Scaling can not be applied.") + log.info( + "After applying restrictions to the raw results, " + "there is not any. Scaling can not be applied." + ) # Update the header and log the progress. - cls.update_progress( - cls, 0, header, number_consensus_relays, state) + cls.update_progress(cls, 0, header, number_consensus_relays, state) # Set the lines that would be excluded anyway (`vote=0`) with # `under_min_report=1` cls.set_under_min_report(bw_lines_excluded) @@ -1088,14 +1184,17 @@ class V3BWFile(object): # log.debug(bw_lines[-1]) elif scaling_method == TORFLOW_SCALING: bw_lines = cls.bw_torflow_scale( - bw_lines_raw, torflow_obs, torflow_cap, round_digs, - router_statuses_d=router_statuses_d + bw_lines_raw, + torflow_obs, + torflow_cap, + round_digs, + router_statuses_d=router_statuses_d, ) # log.debug(bw_lines[-1]) # Update the header and log the progress. min_perc = cls.update_progress( cls, len(bw_lines), header, number_consensus_relays, state - ) + ) # If after scaling the number of lines is less than the percentage # of lines to report, set them with `under_min_report`. if not min_perc: @@ -1113,7 +1212,7 @@ class V3BWFile(object):
@classmethod def from_v1_fpath(cls, fpath): - log.info('Parsing bandwidth file %s', fpath) + log.info("Parsing bandwidth file %s", fpath) with open(fpath) as fd: text = fd.read() all_lines = text.split(LINE_SEP) @@ -1123,13 +1222,14 @@ class V3BWFile(object):
@classmethod def from_v100_fpath(cls, fpath): - log.info('Parsing bandwidth file %s', fpath) + log.info("Parsing bandwidth file %s", fpath) with open(fpath) as fd: text = fd.read() all_lines = text.split(LINE_SEP) header, lines = V3BWHeader.from_lines_v100(all_lines) - bw_lines = sorted([V3BWLine.from_bw_line_v1(l) for l in lines], - key=lambda l: l.bw) + bw_lines = sorted( + [V3BWLine.from_bw_line_v1(l) for l in lines], key=lambda l: l.bw + ) return cls(header, bw_lines)
@staticmethod @@ -1151,8 +1251,9 @@ class V3BWFile(object): return sorted(bw_lines_scaled, key=lambda x: x.bw, reverse=reverse)
@staticmethod - def bw_sbws_scale(bw_lines, scale_constant=SBWS_SCALE_CONSTANT, - reverse=False): + def bw_sbws_scale( + bw_lines, scale_constant=SBWS_SCALE_CONSTANT, reverse=False + ): """Return a new V3BwLine list scaled using sbws method.
:param list bw_lines: @@ -1164,42 +1265,55 @@ class V3BWFile(object): the bandwidth to obtain the new bandwidth :returns list: V3BwLine list """ - log.debug('Scaling bandwidth using sbws method.') + log.debug("Scaling bandwidth using sbws method.") m = median([l.bw for l in bw_lines]) bw_lines_scaled = copy.deepcopy(bw_lines) for l in bw_lines_scaled: # min is to limit the bw to descriptor average-bandwidth # max to avoid bandwidth with 0 value - l.bw = max(round(min(l.desc_bw_avg, - l.bw * scale_constant / m) - / 1000), 1) + l.bw = max( + round(min(l.desc_bw_avg, l.bw * scale_constant / m) / 1000), 1 + ) return sorted(bw_lines_scaled, key=lambda x: x.bw, reverse=reverse)
@staticmethod - def warn_if_not_accurate_enough(bw_lines, - scale_constant=SBWS_SCALE_CONSTANT): + def warn_if_not_accurate_enough( + bw_lines, scale_constant=SBWS_SCALE_CONSTANT + ): margin = 0.001 accuracy_ratio = median([l.bw for l in bw_lines]) / scale_constant - log.info('The generated lines are within {:.5}% of what they should ' - 'be'.format((1 - accuracy_ratio) * 100)) + log.info( + "The generated lines are within {:.5}% of what they should " + "be".format((1 - accuracy_ratio) * 100) + ) if accuracy_ratio < 1 - margin or accuracy_ratio > 1 + margin: - log.warning('There was %f%% error and only +/- %f%% is ' - 'allowed', (1 - accuracy_ratio) * 100, margin * 100) + log.warning( + "There was %f%% error and only +/- %f%% is " "allowed", + (1 - accuracy_ratio) * 100, + margin * 100, + )
@staticmethod - def is_max_bw_diff_perc_reached(bw_lines, - max_bw_diff_perc=MAX_BW_DIFF_PERC, - router_statuses_d=None): + def is_max_bw_diff_perc_reached( + bw_lines, max_bw_diff_perc=MAX_BW_DIFF_PERC, router_statuses_d=None + ): if router_statuses_d: - sum_consensus_bw = sum(list(map( - lambda x: x.bandwidth * 1000, - router_statuses_d.values() - ))) + sum_consensus_bw = sum( + list( + map( + lambda x: x.bandwidth * 1000, + router_statuses_d.values(), + ) + ) + ) else: - sum_consensus_bw = sum([ - l.consensus_bandwidth for l in bw_lines - if getattr(l, 'consensus_bandwidth', None) - ]) + sum_consensus_bw = sum( + [ + l.consensus_bandwidth + for l in bw_lines + if getattr(l, "consensus_bandwidth", None) + ] + ) # Because the scaled bandwidth is in KB, but not the stored consensus # bandwidth, multiply by 1000. # Do not count the bandwidths for the relays that were excluded @@ -1209,20 +1323,28 @@ class V3BWFile(object): abs(sum_consensus_bw - sum_bw) # Avoid ZeroDivisionError / (max(1, (sum_consensus_bw + sum_bw)) / 2) - ) * 100 - log.info("The difference between the total consensus bandwidth (%s)" - "and the total measured bandwidth (%s) is %s%%.", - sum_consensus_bw, sum_bw, round(diff_perc)) + ) * 100 + log.info( + "The difference between the total consensus bandwidth (%s)" + "and the total measured bandwidth (%s) is %s%%.", + sum_consensus_bw, + sum_bw, + round(diff_perc), + ) if diff_perc > MAX_BW_DIFF_PERC: log.warning("It is more than %s%%", max_bw_diff_perc) return True return False
@staticmethod - def bw_torflow_scale(bw_lines, desc_bw_obs_type=TORFLOW_OBS_MEAN, - cap=TORFLOW_BW_MARGIN, - num_round_dig=PROP276_ROUND_DIG, reverse=False, - router_statuses_d=None): + def bw_torflow_scale( + bw_lines, + desc_bw_obs_type=TORFLOW_OBS_MEAN, + cap=TORFLOW_BW_MARGIN, + num_round_dig=PROP276_ROUND_DIG, + reverse=False, + router_statuses_d=None, + ): """ Obtain final bandwidth measurements applying Torflow's scaling method. @@ -1234,8 +1356,8 @@ class V3BWFile(object): mu_type, muf_type = scaling.network_means_by_relay_type( bw_lines_tf, router_statuses_d ) - log.debug('mu %s', mu_type) - log.debug('muf %s', muf_type) + log.debug("mu %s", mu_type) + log.debug("muf %s", muf_type)
# Torflow's ``tot_net_bw``, sum of the scaled bandwidth for the relays # that are in the last consensus @@ -1283,16 +1405,20 @@ class V3BWFile(object): desc_bw = desc_bw_obs # If the relay is unmeasured and consensus bandwidth is None or # 0, use the descriptor bandwidth - if l.consensus_bandwidth_is_unmeasured \ - or not l.consensus_bandwidth: + if ( + l.consensus_bandwidth_is_unmeasured + or not l.consensus_bandwidth + ): min_bandwidth = desc_bw_obs else: min_bandwidth = min(desc_bw, l.consensus_bandwidth) elif l.consensus_bandwidth is not None: min_bandwidth = l.consensus_bandwidth else: - log.warning("Can not scale relay missing descriptor and" - " consensus bandwidth.") + log.warning( + "Can not scale relay missing descriptor and" + " consensus bandwidth." + ) continue
# Torflow's scaling @@ -1334,9 +1460,11 @@ class V3BWFile(object): try: num = len(list(parse_file(consensus_path))) except (FileNotFoundError, AttributeError): - log.info("It is not possible to obtain statistics about the " - "percentage of measured relays because the cached " - "consensus file is not found.") + log.info( + "It is not possible to obtain statistics about the " + "percentage of measured relays because the cached " + "consensus file is not found." + ) log.debug("Number of relays in the network %s", num) return num
@@ -1345,19 +1473,22 @@ class V3BWFile(object): """Read the router statuses from the cached consensus file.""" router_statuses_d = None try: - router_statuses_d = dict([ - (r.fingerprint, r) - for r in parse_file(consensus_path) - ]) + router_statuses_d = dict( + [(r.fingerprint, r) for r in parse_file(consensus_path)] + ) except (FileNotFoundError, AttributeError): - log.warning("It is not possible to obtain the last consensus" - "cached file %s.", consensus_path) + log.warning( + "It is not possible to obtain the last consensus" + "cached file %s.", + consensus_path, + ) return router_statuses_d
@staticmethod - def measured_progress_stats(num_bw_lines, number_consensus_relays, - min_perc_reached_before): - """ Statistics about measurements progress, + def measured_progress_stats( + num_bw_lines, number_consensus_relays, min_perc_reached_before + ): + """Statistics about measurements progress, to be included in the header.
:param list bw_lines: the bw_lines after scaling and applying filters. @@ -1375,38 +1506,49 @@ class V3BWFile(object): assert isinstance(number_consensus_relays, int) assert isinstance(num_bw_lines, int) statsd = {} - statsd['number_eligible_relays'] = num_bw_lines - statsd['number_consensus_relays'] = number_consensus_relays - statsd['minimum_number_eligible_relays'] = round( - statsd['number_consensus_relays'] * MIN_REPORT / 100) - statsd['percent_eligible_relays'] = round( - num_bw_lines * 100 / statsd['number_consensus_relays']) - statsd['minimum_percent_eligible_relays'] = MIN_REPORT - if statsd['number_eligible_relays'] < \ - statsd['minimum_number_eligible_relays']: + statsd["number_eligible_relays"] = num_bw_lines + statsd["number_consensus_relays"] = number_consensus_relays + statsd["minimum_number_eligible_relays"] = round( + statsd["number_consensus_relays"] * MIN_REPORT / 100 + ) + statsd["percent_eligible_relays"] = round( + num_bw_lines * 100 / statsd["number_consensus_relays"] + ) + statsd["minimum_percent_eligible_relays"] = MIN_REPORT + if ( + statsd["number_eligible_relays"] + < statsd["minimum_number_eligible_relays"] + ): # if min percent was was reached before, warn # otherwise, debug if min_perc_reached_before is not None: - log.warning('The percentage of the measured relays is less ' - 'than the %s%% of the relays in the network (%s).', - MIN_REPORT, statsd['number_consensus_relays']) + log.warning( + "The percentage of the measured relays is less " + "than the %s%% of the relays in the network (%s).", + MIN_REPORT, + statsd["number_consensus_relays"], + ) else: - log.info('The percentage of the measured relays is less ' - 'than the %s%% of the relays in the network (%s).', - MIN_REPORT, statsd['number_consensus_relays']) + log.info( + "The percentage of the measured relays is less " + "than the %s%% of the relays in the network (%s).", + MIN_REPORT, + statsd["number_consensus_relays"], + ) return statsd, False return statsd, True
@property def is_min_perc(self): - if getattr(self.header, 'number_eligible_relays', 0) \ - < getattr(self.header, 'minimum_number_eligible_relays', 0): + if getattr(self.header, "number_eligible_relays", 0) < getattr( + self.header, "minimum_number_eligible_relays", 0 + ): return False return True
@property def sum_bw(self): - return sum([l.bw for l in self.bw_lines if hasattr(l, 'bw')]) + return sum([l.bw for l in self.bw_lines if hasattr(l, "bw")])
@property def num(self): @@ -1414,48 +1556,58 @@ class V3BWFile(object):
@property def mean_bw(self): - return mean([l.bw for l in self.bw_lines if hasattr(l, 'bw')]) + return mean([l.bw for l in self.bw_lines if hasattr(l, "bw")])
@property def median_bw(self): - return median([l.bw for l in self.bw_lines if hasattr(l, 'bw')]) + return median([l.bw for l in self.bw_lines if hasattr(l, "bw")])
@property def max_bw(self): - return max([l.bw for l in self.bw_lines if hasattr(l, 'bw')]) + return max([l.bw for l in self.bw_lines if hasattr(l, "bw")])
@property def min_bw(self): - return min([l.bw for l in self.bw_lines if hasattr(l, 'bw')]) + return min([l.bw for l in self.bw_lines if hasattr(l, "bw")])
@property def info_stats(self): if not self.bw_lines: return - [log.info(': '.join([attr, str(getattr(self, attr))])) for attr in - ['sum_bw', 'mean_bw', 'median_bw', 'num', - 'max_bw', 'min_bw']] - - def update_progress(self, num_bw_lines, header, number_consensus_relays, - state): + [ + log.info(": ".join([attr, str(getattr(self, attr))])) + for attr in [ + "sum_bw", + "mean_bw", + "median_bw", + "num", + "max_bw", + "min_bw", + ] + ] + + def update_progress( + self, num_bw_lines, header, number_consensus_relays, state + ): """ Returns True if the minimim percent of Bandwidth Lines was reached and False otherwise. Update the header with the progress. """ - min_perc_reached_before = state.get('min_perc_reached') + min_perc_reached_before = state.get("min_perc_reached") if number_consensus_relays is not None: statsd, success = self.measured_progress_stats( - num_bw_lines, number_consensus_relays, min_perc_reached_before) + num_bw_lines, number_consensus_relays, min_perc_reached_before + ) # add statistics about progress always header.add_stats(**statsd) if not success: # From sbws 1.1.0 the lines are reported (#29853) even if they # are less than the minimum percent. - state['min_perc_reached'] = None + state["min_perc_reached"] = None return False else: - state['min_perc_reached'] = now_isodt_str() + state["min_perc_reached"] = now_isodt_str() return True
def bw_line_for_node_id(self, node_id): @@ -1468,7 +1620,7 @@ class V3BWFile(object): return bwl[0] return None
- def to_plt(self, attrs=['bw'], sorted_by=None): + def to_plt(self, attrs=["bw"], sorted_by=None): """Return bandwidth data in a format useful for matplotlib.
Used from external tool to plot. @@ -1478,17 +1630,17 @@ class V3BWFile(object): return x, ys, attrs
def write(self, output): - if output == '/dev/stdout': + if output == "/dev/stdout": log.info("Writing to stdout is not supported.") return - log.info('Writing v3bw file to %s', output) + log.info("Writing v3bw file to %s", output) # To avoid inconsistent reads, the bandwidth data is written to an # archive path, then atomically symlinked to 'latest.v3bw' out_dir = os.path.dirname(output) - out_link = os.path.join(out_dir, 'latest.v3bw') - out_link_tmp = out_link + '.tmp' + out_link = os.path.join(out_dir, "latest.v3bw") + out_link_tmp = out_link + ".tmp" with DirectoryLock(out_dir): - with open(output, 'wt') as fd: + with open(output, "wt") as fd: fd.write(str(self.header)) for line in self.bw_lines: fd.write(str(line)) @@ -1496,10 +1648,15 @@ class V3BWFile(object): # To atomically symlink a file, we need to create a temporary link, # then rename it to the final link name. (POSIX guarantees that # rename is atomic.) - log.debug('Creating symlink {} -> {}.' - .format(out_link_tmp, output_basename)) + log.debug( + "Creating symlink {} -> {}.".format( + out_link_tmp, output_basename + ) + ) os.symlink(output_basename, out_link_tmp) - log.debug('Renaming symlink {} -> {} to {} -> {}.' - .format(out_link_tmp, output_basename, - out_link, output_basename)) + log.debug( + "Renaming symlink {} -> {} to {} -> {}.".format( + out_link_tmp, output_basename, out_link, output_basename + ) + ) os.rename(out_link_tmp, out_link) diff --git a/sbws/sbws.py b/sbws/sbws.py index 1ac13dd..7639ba5 100644 --- a/sbws/sbws.py +++ b/sbws/sbws.py @@ -20,25 +20,30 @@ log = logging.getLogger(__name__)
def _ensure_dirs(conf): - log.debug('Ensuring all dirs exists.') + log.debug("Ensuring all dirs exists.") # it is not needed to check sbws_home dir, since the following # will create parent dirs too (in case they don't exist) - os.makedirs(conf.getpath('paths', 'datadir'), exist_ok=True) - os.makedirs(conf.getpath('paths', 'v3bw_dname'), exist_ok=True) - os.makedirs(conf.getpath('paths', 'log_dname'), exist_ok=True) + os.makedirs(conf.getpath("paths", "datadir"), exist_ok=True) + os.makedirs(conf.getpath("paths", "v3bw_dname"), exist_ok=True) + os.makedirs(conf.getpath("paths", "log_dname"), exist_ok=True)
def _adjust_log_level(args, conf): if not args.log_level: return - conf['logger_sbws']['level'] = args.log_level + conf["logger_sbws"]["level"] = args.log_level
def _get_startup_line(): py_ver = platform.python_version() py_plat = platform.platform() - return 'sbws %s with python %s on %s, stem %s, and requests %s' % \ - (version, py_ver, py_plat, stem_version, requests_version) + return "sbws %s with python %s on %s, stem %s, and requests %s" % ( + version, + py_ver, + py_plat, + stem_version, + requests_version, + )
def main(): @@ -57,14 +62,22 @@ def main(): def_args = [args, conf] def_kwargs = {} known_commands = { - 'cleanup': {'f': sbws.core.cleanup.main, - 'a': def_args, 'kw': def_kwargs}, - 'scanner': {'f': sbws.core.scanner.main, - 'a': def_args, 'kw': def_kwargs}, - 'generate': {'f': sbws.core.generate.main, - 'a': def_args, 'kw': def_kwargs}, - 'stats': {'f': sbws.core.stats.main, - 'a': def_args, 'kw': def_kwargs}, + "cleanup": { + "f": sbws.core.cleanup.main, + "a": def_args, + "kw": def_kwargs, + }, + "scanner": { + "f": sbws.core.scanner.main, + "a": def_args, + "kw": def_kwargs, + }, + "generate": { + "f": sbws.core.generate.main, + "a": def_args, + "kw": def_kwargs, + }, + "stats": {"f": sbws.core.stats.main, "a": def_args, "kw": def_kwargs}, } try: if args.command not in known_commands: @@ -72,6 +85,6 @@ def main(): else: log.info(_get_startup_line()) comm = known_commands[args.command] - exit(comm['f'](*comm['a'], **comm['kw'])) + exit(comm["f"](*comm["a"], **comm["kw"])) except KeyboardInterrupt: - print('') + print("") diff --git a/sbws/util/config.py b/sbws/util/config.py index 5ba3fd5..8d18b12 100644 --- a/sbws/util/config.py +++ b/sbws/util/config.py @@ -1,6 +1,6 @@ """Util functions to manage sbws configuration files."""
-from configparser import (ConfigParser, ExtendedInterpolation) +from configparser import ConfigParser, ExtendedInterpolation from configparser import InterpolationMissingOptionError import os import logging @@ -8,21 +8,25 @@ import logging.config from urllib.parse import urlparse from string import Template from tempfile import NamedTemporaryFile -from sbws.globals import (DEFAULT_CONFIG_PATH, DEFAULT_LOG_CONFIG_PATH, - USER_CONFIG_PATH, SUPERVISED_RUN_DPATH, - SUPERVISED_USER_CONFIG_PATH) +from sbws.globals import ( + DEFAULT_CONFIG_PATH, + DEFAULT_LOG_CONFIG_PATH, + USER_CONFIG_PATH, + SUPERVISED_RUN_DPATH, + SUPERVISED_USER_CONFIG_PATH, +)
from sbws.util.iso3166 import ISO_3166_ALPHA_2
-_ALPHANUM = 'abcdefghijklmnopqrstuvwxyz' +_ALPHANUM = "abcdefghijklmnopqrstuvwxyz" _ALPHANUM += _ALPHANUM.upper() -_ALPHANUM += '0123456789' +_ALPHANUM += "0123456789"
-_SYMBOLS_NO_QUOTES = '!@#$%^&*()-_=+\|[]{}:;/?.,<>' +_SYMBOLS_NO_QUOTES = "!@#$%^&*()-_=+\|[]{}:;/?.,<>"
-_HEX = '0123456789ABCDEF' +_HEX = "0123456789ABCDEF"
-_LOG_LEVELS = ['debug', 'info', 'warning', 'error', 'critical'] +_LOG_LEVELS = ["debug", "info", "warning", "error", "critical"]
log = logging.getLogger(__name__)
@@ -37,16 +41,18 @@ def _expand_path(path):
def _extend_config(conf, fname): """Extend ConfigParser from file configuration.""" - log.debug('Reading config file %s', fname) - with open(fname, 'rt') as fd: + log.debug("Reading config file %s", fname) + with open(fname, "rt") as fd: conf.read_file(fd, source=fname) return conf
def _get_default_config(): """Return ConfigParser with default configuration.""" - conf = ConfigParser(interpolation=ExtendedInterpolation(), - converters={'path': _expand_path}) + conf = ConfigParser( + interpolation=ExtendedInterpolation(), + converters={"path": _expand_path}, + ) return _extend_config(conf, DEFAULT_CONFIG_PATH)
@@ -62,32 +68,38 @@ def _get_user_config(args, conf=None): argument and extend the configuration if they are found. """ if not conf: - conf = ConfigParser(interpolation=ExtendedInterpolation(), - converters={'path': _expand_path}) + conf = ConfigParser( + interpolation=ExtendedInterpolation(), + converters={"path": _expand_path}, + ) else: assert isinstance(conf, ConfigParser) if args.config: if not os.path.isfile(args.config): # XXX: The logger is not configured at this stage, # sbws should start with a logger before reading configurations. - print('Configuration file %s not found, using defaults.' % - args.config) + print( + "Configuration file %s not found, using defaults." + % args.config + ) return conf - print('Using configuration provided as argument %s' % args.config) + print("Using configuration provided as argument %s" % args.config) return _extend_config(conf, args.config) user_config_path = _obtain_user_conf_path() if os.path.isfile(user_config_path): - print('Using configuration file %s' % user_config_path) + print("Using configuration file %s" % user_config_path) return _extend_config(conf, user_config_path) - log.debug('No user config found, using defaults.') + log.debug("No user config found, using defaults.") return conf
def _get_default_logging_config(conf=None): """Get default logging configuration.""" if not conf: - conf = ConfigParser(interpolation=ExtendedInterpolation(), - converters={'path': _expand_path}) + conf = ConfigParser( + interpolation=ExtendedInterpolation(), + converters={"path": _expand_path}, + ) else: assert isinstance(conf, ConfigParser) return _extend_config(conf, DEFAULT_LOG_CONFIG_PATH) @@ -102,7 +114,7 @@ def get_config(args):
def _can_log_to_file(conf): - ''' + """ Checks all the known reasons for why we might not be able to log to a file, and returns whether or not we think we will be able to do so. This is useful because if we can't log to a file, we might want to force logging to @@ -110,34 +122,34 @@ def _can_log_to_file(conf):
If we can't log to file, return False and the reason. Otherwise return True and an empty string. - ''' + """ # We won't be able to get paths.log_dname from the config when we are first # initializing sbws because it depends on paths.sbws_home (by default). # If there is an issue getting this option, tell the caller that we can't # log to file. try: - conf.getpath('paths', 'log_dname') + conf.getpath("paths", "log_dname") except InterpolationMissingOptionError as e: return False, e - return True, '' + return True, ""
def configure_logging(args, conf): assert isinstance(conf, ConfigParser) - logger = 'logger_sbws' + logger = "logger_sbws" # Set the correct handler(s) based on [logging] options handlers = set() can_log_to_file, reason = _can_log_to_file(conf) - if not can_log_to_file or conf.getboolean('logging', 'to_stdout'): + if not can_log_to_file or conf.getboolean("logging", "to_stdout"): # always add to_stdout if we cannot log to file - handlers.add('to_stdout') - if can_log_to_file and conf.getboolean('logging', 'to_file'): - handlers.add('to_file') - if conf.getboolean('logging', 'to_syslog'): - handlers.add('to_syslog') + handlers.add("to_stdout") + if can_log_to_file and conf.getboolean("logging", "to_file"): + handlers.add("to_file") + if conf.getboolean("logging", "to_syslog"): + handlers.add("to_syslog") # Collect the handlers in the appropriate config option - conf[logger]['handlers'] = ','.join(handlers) - if 'to_file' in handlers: + conf[logger]["handlers"] = ",".join(handlers) + if "to_file" in handlers: # This is weird. # # Python's logging library expects 'args' to be a tuple ... but it has @@ -146,28 +158,31 @@ def configure_logging(args, conf): # The first argument is the file name to which it should log. Set it to # the sbws command (like 'scanner' or 'generate') if possible, or to # 'sbws' failing that. - dname = conf.getpath('paths', 'log_dname') + dname = conf.getpath("paths", "log_dname") os.makedirs(dname, exist_ok=True) - fname = os.path.join(dname, '{}.log'.format(args.command or 'sbws')) + fname = os.path.join(dname, "{}.log".format(args.command or "sbws")) # The second argument is the file mode, and it should be left alone - mode = 'a' + mode = "a" # The third is the maximum file size (in bytes) each log file should be - max_bytes = conf.getint('logging', 'to_file_max_bytes') + max_bytes = conf.getint("logging", "to_file_max_bytes") # And the forth is the number of backups to keep - num_backups = conf.getint('logging', 'to_file_num_backups') + num_backups = conf.getint("logging", "to_file_num_backups") # Now store those things as a string in the config. So dumb. - conf['handler_to_file']['args'] = \ - str((fname, mode, max_bytes, num_backups)) + conf["handler_to_file"]["args"] = str( + (fname, mode, max_bytes, num_backups) + ) # Set some stuff that needs config parser's interpolation - conf['formatter_to_file']['format'] = conf['logging']['to_file_format'] - conf['formatter_to_stdout']['format'] = conf['logging']['to_stdout_format'] - conf['formatter_to_syslog']['format'] = conf['logging']['to_syslog_format'] - conf[logger]['level'] = conf['logging']['level'].upper() - conf['handler_to_file']['level'] = conf['logging']['to_file_level'].upper() - conf['handler_to_stdout']['level'] = \ - conf['logging']['to_stdout_level'].upper() - conf['handler_to_syslog']['level'] = \ - conf['logging']['to_syslog_level'].upper() + conf["formatter_to_file"]["format"] = conf["logging"]["to_file_format"] + conf["formatter_to_stdout"]["format"] = conf["logging"]["to_stdout_format"] + conf["formatter_to_syslog"]["format"] = conf["logging"]["to_syslog_format"] + conf[logger]["level"] = conf["logging"]["level"].upper() + conf["handler_to_file"]["level"] = conf["logging"]["to_file_level"].upper() + conf["handler_to_stdout"]["level"] = conf["logging"][ + "to_stdout_level" + ].upper() + conf["handler_to_syslog"]["level"] = conf["logging"][ + "to_syslog_level" + ].upper() # If there's a log_level cli argument, the user would expect that level # in the standard output. # conf['logging']['level'] sets the lower level, but it's still needed to @@ -175,19 +190,19 @@ def configure_logging(args, conf): # It also must be set up in the end, since cli arguments have higher # priority. if args.log_level: - conf['logging']['level'] = args.log_level.upper() - conf['handler_to_stdout']['level'] = conf['logging']['level'] + conf["logging"]["level"] = args.log_level.upper() + conf["handler_to_stdout"]["level"] = conf["logging"]["level"] # Now we configure the standard python logging system - with NamedTemporaryFile('w+t') as fd: + with NamedTemporaryFile("w+t") as fd: conf.write(fd) fd.seek(0, 0) logging.config.fileConfig(fd.name)
def validate_config(conf): - ''' Checks the given conf for bad values or bad combinations of values. If + """Checks the given conf for bad values or bad combinations of values. If there's something wrong, returns False and a list of error messages. - Otherwise, return True and an empty list ''' + Otherwise, return True and an empty list""" errors = [] errors.extend(_validate_general(conf)) errors.extend(_validate_cleanup(conf)) @@ -202,13 +217,13 @@ def validate_config(conf):
def _validate_cleanup(conf): errors = [] - sec = 'cleanup' - err_tmpl = Template('$sec/$key ($val): $e') + sec = "cleanup" + err_tmpl = Template("$sec/$key ($val): $e") ints = { - 'data_files_compress_after_days': {'minimum': 1, 'maximum': None}, - 'data_files_delete_after_days': {'minimum': 1, 'maximum': None}, - 'v3bw_files_compress_after_days': {'minimum': 1, 'maximum': None}, - 'v3bw_files_delete_after_days': {'minimum': 1, 'maximum': None}, + "data_files_compress_after_days": {"minimum": 1, "maximum": None}, + "data_files_delete_after_days": {"minimum": 1, "maximum": None}, + "v3bw_files_compress_after_days": {"minimum": 1, "maximum": None}, + "v3bw_files_delete_after_days": {"minimum": 1, "maximum": None}, } all_valid_keys = list(ints.keys()) errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl)) @@ -218,21 +233,22 @@ def _validate_cleanup(conf):
def _validate_general(conf): errors = [] - sec = 'general' - err_tmpl = Template('$sec/$key ($val): $e') + sec = "general" + err_tmpl = Template("$sec/$key ($val): $e") ints = { - 'data_period': {'minimum': 1, 'maximum': None}, - 'circuit_timeout': {'minimum': 1, 'maximum': None}, + "data_period": {"minimum": 1, "maximum": None}, + "circuit_timeout": {"minimum": 1, "maximum": None}, } floats = { - 'http_timeout': {'minimum': 0.0, 'maximum': None}, + "http_timeout": {"minimum": 0.0, "maximum": None}, } bools = { - 'reset_bw_ipv4_changes': {}, - 'reset_bw_ipv6_changes': {}, + "reset_bw_ipv4_changes": {}, + "reset_bw_ipv6_changes": {}, } - all_valid_keys = list(ints.keys()) + list(floats.keys()) + \ - list(bools.keys()) + all_valid_keys = ( + list(ints.keys()) + list(floats.keys()) + list(bools.keys()) + ) errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl)) errors.extend(_validate_section_ints(conf, sec, ints, err_tmpl)) errors.extend(_validate_section_floats(conf, sec, floats, err_tmpl)) @@ -241,90 +257,117 @@ def _validate_general(conf):
def _obtain_sbws_home(conf): - sbws_home = conf.getpath('paths', 'sbws_home') + sbws_home = conf.getpath("paths", "sbws_home") # No need for .sbws when this is the default home if sbws_home == "/var/lib/sbws/.sbws": - conf['paths']['sbws_home'] = os.path.dirname(sbws_home) + conf["paths"]["sbws_home"] = os.path.dirname(sbws_home)
def _obtain_run_dpath(conf): """Set runtime directory when sbws is run by a system service.""" - xdg = os.environ.get('XDG_RUNTIME_DIR') - if os.environ.get('SUPERVISED') == "1": - conf['tor']['run_dpath'] = SUPERVISED_RUN_DPATH + xdg = os.environ.get("XDG_RUNTIME_DIR") + if os.environ.get("SUPERVISED") == "1": + conf["tor"]["run_dpath"] = SUPERVISED_RUN_DPATH elif xdg is not None: - conf['tor']['run_dpath'] = os.path.join(xdg, 'sbws', 'tor') + conf["tor"]["run_dpath"] = os.path.join(xdg, "sbws", "tor")
def _validate_paths(conf): _obtain_sbws_home(conf) errors = [] - sec = 'paths' - err_tmpl = Template('$sec/$key ($val): $e') + sec = "paths" + err_tmpl = Template("$sec/$key ($val): $e") unvalidated_keys = [ - 'datadir', 'sbws_home', 'v3bw_fname', 'v3bw_dname', 'state_fname', - 'log_dname'] + "datadir", + "sbws_home", + "v3bw_fname", + "v3bw_dname", + "state_fname", + "log_dname", + ] all_valid_keys = unvalidated_keys - allow_missing = ['sbws_home'] - errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl, - allow_missing=allow_missing)) + allow_missing = ["sbws_home"] + errors.extend( + _validate_section_keys( + conf, sec, all_valid_keys, err_tmpl, allow_missing=allow_missing + ) + ) return errors
def _validate_country(conf, sec, key, err_tmpl): errors = [] if conf[sec].get(key, None) is None: - errors.append(err_tmpl.substitute( - sec=sec, key=key, val=None, - e="Missing country in configuration file.")) + errors.append( + err_tmpl.substitute( + sec=sec, + key=key, + val=None, + e="Missing country in configuration file.", + ) + ) return errors - valid = conf[sec]['country'] in ISO_3166_ALPHA_2 + valid = conf[sec]["country"] in ISO_3166_ALPHA_2 if not valid: - errors.append(err_tmpl.substitute( - sec=sec, key=key, val=conf[sec][key], - e="Not a valid ISO 3166 alpha-2 country code.")) + errors.append( + err_tmpl.substitute( + sec=sec, + key=key, + val=conf[sec][key], + e="Not a valid ISO 3166 alpha-2 country code.", + ) + ) return errors
def _validate_scanner(conf): errors = [] - sec = 'scanner' - err_tmpl = Template('$sec/$key ($val): $e') + sec = "scanner" + err_tmpl = Template("$sec/$key ($val): $e") ints = { - 'num_rtts': {'minimum': 0, 'maximum': 100}, - 'num_downloads': {'minimum': 1, 'maximum': 100}, - 'initial_read_request': {'minimum': 1, 'maximum': None}, - 'measurement_threads': {'minimum': 1, 'maximum': None}, - 'min_download_size': {'minimum': 1, 'maximum': None}, - 'max_download_size': {'minimum': 1, 'maximum': None}, + "num_rtts": {"minimum": 0, "maximum": 100}, + "num_downloads": {"minimum": 1, "maximum": 100}, + "initial_read_request": {"minimum": 1, "maximum": None}, + "measurement_threads": {"minimum": 1, "maximum": None}, + "min_download_size": {"minimum": 1, "maximum": None}, + "max_download_size": {"minimum": 1, "maximum": None}, } floats = { - 'download_toofast': {'minimum': 0.001, 'maximum': None}, - 'download_min': {'minimum': 0.001, 'maximum': None}, - 'download_target': {'minimum': 0.001, 'maximum': None}, - 'download_max': {'minimum': 0.001, 'maximum': None}, + "download_toofast": {"minimum": 0.001, "maximum": None}, + "download_min": {"minimum": 0.001, "maximum": None}, + "download_target": {"minimum": 0.001, "maximum": None}, + "download_max": {"minimum": 0.001, "maximum": None}, } - all_valid_keys = list(ints.keys()) + list(floats.keys()) + \ - ['nickname', 'country'] + all_valid_keys = ( + list(ints.keys()) + list(floats.keys()) + ["nickname", "country"] + ) errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl)) errors.extend(_validate_section_ints(conf, sec, ints, err_tmpl)) errors.extend(_validate_section_floats(conf, sec, floats, err_tmpl)) - valid, error_msg = _validate_nickname(conf[sec], 'nickname') + valid, error_msg = _validate_nickname(conf[sec], "nickname") if not valid: - errors.append(err_tmpl.substitute( - sec=sec, key='nickname', val=conf[sec]['nickname'], e=error_msg)) - errors.extend(_validate_country(conf, sec, 'country', err_tmpl)) + errors.append( + err_tmpl.substitute( + sec=sec, key="nickname", val=conf[sec]["nickname"], e=error_msg + ) + ) + errors.extend(_validate_country(conf, sec, "country", err_tmpl)) return errors
def _validate_tor(conf): _obtain_run_dpath(conf) errors = [] - sec = 'tor' - err_tmpl = Template('$sec/$key ($val): $e') + sec = "tor" + err_tmpl = Template("$sec/$key ($val): $e") unvalidated_keys = [ - 'datadir', 'run_dpath', 'control_socket', 'pid', 'log', - 'external_control_port', 'extra_lines', + "datadir", + "run_dpath", + "control_socket", + "pid", + "log", + "external_control_port", + "extra_lines", ] all_valid_keys = unvalidated_keys errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl)) @@ -333,19 +376,20 @@ def _validate_tor(conf):
def _validate_relayprioritizer(conf): errors = [] - sec = 'relayprioritizer' - err_tmpl = Template('$sec/$key ($val): $e') + sec = "relayprioritizer" + err_tmpl = Template("$sec/$key ($val): $e") ints = { - 'min_relays': {'minimum': 1, 'maximum': None}, + "min_relays": {"minimum": 1, "maximum": None}, } floats = { - 'fraction_relays': {'minimum': 0.0, 'maximum': 1.0}, + "fraction_relays": {"minimum": 0.0, "maximum": 1.0}, } bools = { - 'measure_authorities': {}, + "measure_authorities": {}, } - all_valid_keys = list(ints.keys()) + list(floats.keys()) +\ - list(bools.keys()) + all_valid_keys = ( + list(ints.keys()) + list(floats.keys()) + list(bools.keys()) + ) errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl)) errors.extend(_validate_section_ints(conf, sec, ints, err_tmpl)) errors.extend(_validate_section_floats(conf, sec, floats, err_tmpl)) @@ -355,27 +399,35 @@ def _validate_relayprioritizer(conf):
def _validate_logging(conf): errors = [] - sec = 'logging' - err_tmpl = Template('$sec/$key ($val): $e') + sec = "logging" + err_tmpl = Template("$sec/$key ($val): $e") enums = { - 'level': {'choices': _LOG_LEVELS}, - 'to_file_level': {'choices': _LOG_LEVELS}, - 'to_stdout_level': {'choices': _LOG_LEVELS}, - 'to_syslog_level': {'choices': _LOG_LEVELS}, + "level": {"choices": _LOG_LEVELS}, + "to_file_level": {"choices": _LOG_LEVELS}, + "to_stdout_level": {"choices": _LOG_LEVELS}, + "to_syslog_level": {"choices": _LOG_LEVELS}, } bools = { - 'to_file': {}, - 'to_stdout': {}, - 'to_syslog': {}, + "to_file": {}, + "to_stdout": {}, + "to_syslog": {}, } ints = { - 'to_file_max_bytes': {'minimum': 0, 'maximum': None}, - 'to_file_num_backups': {'minimum': 0, 'maximum': None}, + "to_file_max_bytes": {"minimum": 0, "maximum": None}, + "to_file_num_backups": {"minimum": 0, "maximum": None}, } - unvalidated = ['format', 'to_file_format', 'to_stdout_format', - 'to_syslog_format'] - all_valid_keys = list(bools.keys()) + list(enums.keys()) + \ - list(ints.keys()) + unvalidated + unvalidated = [ + "format", + "to_file_format", + "to_stdout_format", + "to_syslog_format", + ] + all_valid_keys = ( + list(bools.keys()) + + list(enums.keys()) + + list(ints.keys()) + + unvalidated + ) errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl)) errors.extend(_validate_section_bools(conf, sec, bools, err_tmpl)) errors.extend(_validate_section_enums(conf, sec, enums, err_tmpl)) @@ -384,42 +436,57 @@ def _validate_logging(conf):
def _validate_destinations(conf): errors = [] - sec = 'destinations' + sec = "destinations" section = conf[sec] - err_tmpl = Template('$sec/$key ($val): $e') + err_tmpl = Template("$sec/$key ($val): $e") dest_sections = [] for key in section.keys(): - if key == 'usability_test_interval': + if key == "usability_test_interval": value = section[key] valid, error_msg = _validate_int(section, key, minimum=1) if not valid: - errors.append(err_tmpl.substitute( - sec=sec, key=key, val=value, e=error_msg)) + errors.append( + err_tmpl.substitute( + sec=sec, key=key, val=value, e=error_msg + ) + ) continue value = section[key] valid, error_msg = _validate_boolean(section, key) if not valid: - errors.append(err_tmpl.substitute( - sec=sec, key=key, val=value, e=error_msg)) + errors.append( + err_tmpl.substitute(sec=sec, key=key, val=value, e=error_msg) + ) continue assert valid if section.getboolean(key): - dest_sections.append('{}.{}'.format(sec, key)) + dest_sections.append("{}.{}".format(sec, key)) urls = { - 'url': {}, + "url": {}, } - all_valid_keys = list(urls.keys()) \ - + ['verify', 'country', 'max_num_failures'] + all_valid_keys = list(urls.keys()) + [ + "verify", + "country", + "max_num_failures", + ] for sec in dest_sections: if sec not in conf: - errors.append('{} is an enabled destination but is not a ' - 'section in the config'.format(sec)) + errors.append( + "{} is an enabled destination but is not a " + "section in the config".format(sec) + ) continue - errors.extend(_validate_section_keys( - conf, sec, all_valid_keys, err_tmpl, - allow_missing=['verify', 'max_num_failures'])) + errors.extend( + _validate_section_keys( + conf, + sec, + all_valid_keys, + err_tmpl, + allow_missing=["verify", "max_num_failures"], + ) + ) errors.extend(_validate_section_urls(conf, sec, urls, err_tmpl)) - errors.extend(_validate_country(conf, sec, 'country', err_tmpl)) + errors.extend(_validate_country(conf, sec, "country", err_tmpl)) return errors
@@ -431,13 +498,19 @@ def _validate_section_keys(conf, sec, keys, tmpl, allow_missing=None): # Find keys that exist in the user's config that are not known for key in section: if key not in keys: - errors.append(tmpl.substitute( - sec=sec, key=key, val=section[key], e='Unknown key')) + errors.append( + tmpl.substitute( + sec=sec, key=key, val=section[key], e="Unknown key" + ) + ) # Find keys that don't exist in the user's config that should for key in keys: if key not in section and key not in allow_missing: - errors.append(tmpl.substitute( - sec=sec, key=key, val='[NOT SET]', e='Missing key')) + errors.append( + tmpl.substitute( + sec=sec, key=key, val="[NOT SET]", e="Missing key" + ) + ) return errors
@@ -446,11 +519,15 @@ def _validate_section_ints(conf, sec, ints, tmpl): section = conf[sec] for key in ints: valid, error = _validate_int( - section, key, minimum=ints[key]['minimum'], - maximum=ints[key]['maximum']) + section, + key, + minimum=ints[key]["minimum"], + maximum=ints[key]["maximum"], + ) if not valid: - errors.append(tmpl.substitute( - sec=sec, key=key, val=section[key], e=error)) + errors.append( + tmpl.substitute(sec=sec, key=key, val=section[key], e=error) + ) return errors
@@ -459,11 +536,15 @@ def _validate_section_floats(conf, sec, floats, tmpl): section = conf[sec] for key in floats: valid, error = _validate_float( - section, key, minimum=floats[key]['minimum'], - maximum=floats[key]['maximum']) + section, + key, + minimum=floats[key]["minimum"], + maximum=floats[key]["maximum"], + ) if not valid: - errors.append(tmpl.substitute( - sec=sec, key=key, val=section[key], e=error)) + errors.append( + tmpl.substitute(sec=sec, key=key, val=section[key], e=error) + ) return errors
@@ -473,8 +554,9 @@ def _validate_section_hosts(conf, sec, hosts, tmpl): for key in hosts: valid, error = _validate_host(section, key) if not valid: - errors.append(tmpl.substitute( - sec=sec, key=key, val=section[key], e=error)) + errors.append( + tmpl.substitute(sec=sec, key=key, val=section[key], e=error) + ) return errors
@@ -482,11 +564,16 @@ def _validate_section_ports(conf, sec, ports, tmpl): errors = [] section = conf[sec] for key in ports: - valid, error = _validate_int(section, key, minimum=1, maximum=2**16) + valid, error = _validate_int(section, key, minimum=1, maximum=2 ** 16) if not valid: - errors.append(tmpl.substitute( - sec=sec, key=key, val=section[key], - e='Not a valid port ({})'.format(error))) + errors.append( + tmpl.substitute( + sec=sec, + key=key, + val=section[key], + e="Not a valid port ({})".format(error), + ) + ) return errors
@@ -496,9 +583,14 @@ def _validate_section_bools(conf, sec, bools, tmpl): for key in bools: valid, error = _validate_boolean(section, key) if not valid: - errors.append(tmpl.substitute( - sec=sec, key=key, val=section[key], - e='Not a valid boolean string ({})'.format(error))) + errors.append( + tmpl.substitute( + sec=sec, + key=key, + val=section[key], + e="Not a valid boolean string ({})".format(error), + ) + ) return errors
@@ -508,9 +600,14 @@ def _validate_section_fingerprints(conf, sec, fps, tmpl): for key in fps: valid, error = _validate_fingerprint(section, key) if not valid: - errors.append(tmpl.substitute( - sec=sec, key=key, val=section[key], - e='Not a valid fingerprint ({})'.format(error))) + errors.append( + tmpl.substitute( + sec=sec, + key=key, + val=section[key], + e="Not a valid fingerprint ({})".format(error), + ) + ) return errors
@@ -520,9 +617,14 @@ def _validate_section_urls(conf, sec, urls, tmpl): for key in urls: valid, error = _validate_url(section, key) if not valid: - errors.append(tmpl.substitute( - sec=sec, key=key, val=section[key], - e='Not a valid url ({})'.format(error))) + errors.append( + tmpl.substitute( + sec=sec, + key=key, + val=section[key], + e="Not a valid url ({})".format(error), + ) + ) return errors
@@ -530,36 +632,44 @@ def _validate_section_enums(conf, sec, enums, tmpl): errors = [] section = conf[sec] for key in enums: - choices = enums[key]['choices'] + choices = enums[key]["choices"] valid, error = _validate_enum(section, key, choices) if not valid: - errors.append(tmpl.substitute( - sec=sec, key=key, val=section[key], - e='Not a valid enum choice ({})'.format(', '.join(choices)))) + errors.append( + tmpl.substitute( + sec=sec, + key=key, + val=section[key], + e="Not a valid enum choice ({})".format( + ", ".join(choices) + ), + ) + ) return errors
def _validate_enum(section, key, choices): value = section[key] if value not in choices: - return False, '{} not in allowed choices: {}'.format( - value, ', '.join(choices)) - return True, '' + return False, "{} not in allowed choices: {}".format( + value, ", ".join(choices) + ) + return True, ""
def _validate_url(section, key): value = section[key] url = urlparse(value) if not url.netloc: - return False, 'Does not appear to contain a hostname' + return False, "Does not appear to contain a hostname" # It should be possible to have an URL that starts by http:// that uses # TLS,but python requests is just checking the scheme starts by https # when verifying certificate: # https://github.com/requests/requests/blob/master/requests/adapters.py#L215 # noqa # When the scheme is https but the protocol is not TLS, requests will hang. - if url.scheme != 'https' and not url.netloc.startswith('127.0.0.1'): - return False, 'URL scheme must be HTTPS (except for the test server)' - return True, '' + if url.scheme != "https" and not url.netloc.startswith("127.0.0.1"): + return False, "URL scheme must be HTTPS (except for the test server)" + return True, ""
def _validate_int(section, key, minimum=None, maximum=None): @@ -570,12 +680,12 @@ def _validate_int(section, key, minimum=None, maximum=None): if minimum is not None: assert isinstance(minimum, int) if value < minimum: - return False, 'Cannot be less than {}'.format(minimum) + return False, "Cannot be less than {}".format(minimum) if maximum is not None: assert isinstance(maximum, int) if value > maximum: - return False, 'Cannot be greater than {}'.format(maximum) - return True, '' + return False, "Cannot be greater than {}".format(maximum) + return True, ""
def _validate_boolean(section, key): @@ -583,7 +693,7 @@ def _validate_boolean(section, key): section.getboolean(key) except ValueError as e: return False, e - return True, '' + return True, ""
def _validate_float(section, key, minimum=None, maximum=None): @@ -594,49 +704,57 @@ def _validate_float(section, key, minimum=None, maximum=None): if minimum is not None: assert isinstance(minimum, float) if value < minimum: - return False, 'Cannot be less than {}'.format(minimum) + return False, "Cannot be less than {}".format(minimum) if maximum is not None: assert isinstance(maximum, float) if value > maximum: - return False, 'Cannot be greater than {}'.format(maximum) - return True, '' + return False, "Cannot be greater than {}".format(maximum) + return True, ""
def _validate_host(section, key): # XXX: Implement this - return True, '' + return True, ""
def _validate_fingerprint(section, key): alphabet = _HEX length = 40 - return _validate_string(section, key, min_len=length, max_len=length, - alphabet=alphabet) + return _validate_string( + section, key, min_len=length, max_len=length, alphabet=alphabet + )
def _validate_nickname(section, key): alphabet = _ALPHANUM + _SYMBOLS_NO_QUOTES min_len = 1 max_len = 32 - return _validate_string(section, key, min_len=min_len, max_len=max_len, - alphabet=alphabet) + return _validate_string( + section, key, min_len=min_len, max_len=max_len, alphabet=alphabet + )
-def _validate_string(section, key, min_len=None, max_len=None, alphabet=None, - starts_with=None): +def _validate_string( + section, key, min_len=None, max_len=None, alphabet=None, starts_with=None +): s = section[key] if min_len is not None and len(s) < min_len: - return False, '{} is below minimum allowed length {}'.format( - len(s), min_len) + return False, "{} is below minimum allowed length {}".format( + len(s), min_len + ) if max_len is not None and len(s) > max_len: - return False, '{} is above maximum allowed length {}'.format( - len(s), max_len) + return False, "{} is above maximum allowed length {}".format( + len(s), max_len + ) if alphabet is not None: for i, c in enumerate(s): if c not in alphabet: - return False, 'Letter {} at position {} is not in allowed '\ - 'characters "{}"'.format(c, i, alphabet) + return ( + False, + "Letter {} at position {} is not in allowed " + 'characters "{}"'.format(c, i, alphabet), + ) if starts_with is not None: if not s.startswith(starts_with): - return False, '{} does not start with {}'.format(s, starts_with) - return True, '' + return False, "{} does not start with {}".format(s, starts_with) + return True, "" diff --git a/sbws/util/filelock.py b/sbws/util/filelock.py index 882c4e6..9a70ece 100644 --- a/sbws/util/filelock.py +++ b/sbws/util/filelock.py @@ -14,22 +14,25 @@ class _FLock: def __enter__(self): mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC self._fd = os.open(self._lock_fname, mode) - log.debug('Going to lock %s', self._lock_fname) + log.debug("Going to lock %s", self._lock_fname) try: fcntl.flock(self._fd, fcntl.LOCK_EX) except OSError as e: - fail_hard('We couldn't call flock. Are you on an unsupported ' - 'platform? Error: %s', e) - log.debug('Received lock %s', self._lock_fname) + fail_hard( + "We couldn't call flock. Are you on an unsupported " + "platform? Error: %s", + e, + ) + log.debug("Received lock %s", self._lock_fname)
def __exit__(self, exc_type, exc_val, exc_tb): if self._fd is not None: - log.debug('Releasing lock %s', self._lock_fname) + log.debug("Releasing lock %s", self._lock_fname) os.close(self._fd)
class DirectoryLock(_FLock): - ''' + """ Holds a lock on a file in **dname** so that other sbws processes/threads won't try to read/write while we are reading/writing in this directory.
@@ -40,15 +43,16 @@ class DirectoryLock(_FLock): Note: The directory must already exist.
:param str dname: Name of directory for which we want to obtain a lock - ''' + """ + def __init__(self, dname): assert os.path.isdir(dname) - lock_fname = os.path.join(dname, '.lockfile') + lock_fname = os.path.join(dname, ".lockfile") super().__init__(lock_fname)
class FileLock(_FLock): - ''' + """ Holds a lock on **fname** so that other sbws processes/threads won't try to read/write while we are reading/writing this file.
@@ -57,7 +61,8 @@ class FileLock(_FLock): >>> # no longer have the lock
:param str fname: Name of the file for which we want to obtain a lock - ''' + """ + def __init__(self, fname): - lock_fname = fname + '.lockfile' + lock_fname = fname + ".lockfile" super().__init__(lock_fname) diff --git a/sbws/util/fs.py b/sbws/util/fs.py index e5e7173..f079219 100644 --- a/sbws/util/fs.py +++ b/sbws/util/fs.py @@ -38,47 +38,56 @@ def sbws_required_disk_space(conf): # default crontab configuration will run cleanup once a day # default cleanup configuration will compress v3bw files after 1 day # and delete them after 7 days - v3bw_compress_after_days = conf.getint('cleanup', - 'v3bw_files_compress_after_days') - v3bw_delete_after_days = conf.getint('cleanup', - 'v3bw_files_delete_after_days') - v3bw_max_space_after_delete = \ - (space_v3bw_files_day * v3bw_compress_after_days) + \ - (size_compressed_files * num_v3bw_files_day * v3bw_delete_after_days) - text_dict['mb_bw'] = round(v3bw_max_space_after_delete / 1000 ** 2) - text_dict['d_bw'] = v3bw_delete_after_days + v3bw_compress_after_days = conf.getint( + "cleanup", "v3bw_files_compress_after_days" + ) + v3bw_delete_after_days = conf.getint( + "cleanup", "v3bw_files_delete_after_days" + ) + v3bw_max_space_after_delete = ( + space_v3bw_files_day * v3bw_compress_after_days + ) + (size_compressed_files * num_v3bw_files_day * v3bw_delete_after_days) + text_dict["mb_bw"] = round(v3bw_max_space_after_delete / 1000 ** 2) + text_dict["d_bw"] = v3bw_delete_after_days # default crontab configuration will run cleanup once a day # default cleanup configuration will compress v3bw files after 1 day # and delete them after 7 days - results_compress_after_days = conf.getint('cleanup', - 'data_files_compress_after_days') - results_delete_after_days = conf.getint('cleanup', - 'data_files_delete_after_days') - results_max_space_after_delete = \ - (space_result_files_day * results_compress_after_days) + \ - (size_compressed_files * num_v3bw_files_day * - results_delete_after_days) - text_dict['mb_results'] = round(results_max_space_after_delete / 1000 ** 2) - text_dict['d_r'] = results_delete_after_days + results_compress_after_days = conf.getint( + "cleanup", "data_files_compress_after_days" + ) + results_delete_after_days = conf.getint( + "cleanup", "data_files_delete_after_days" + ) + results_max_space_after_delete = ( + space_result_files_day * results_compress_after_days + ) + ( + size_compressed_files * num_v3bw_files_day * results_delete_after_days + ) + text_dict["mb_results"] = round(results_max_space_after_delete / 1000 ** 2) + text_dict["d_r"] = results_delete_after_days # not counted rotated files and assuming that when it is not rotated the # size will be aproximately 10MiB space_log_files = 0 - if conf.getboolean('logging', 'to_file'): - size_log_file = conf.getint('logging', 'to_file_max_bytes') - num_log_files = conf.getint('logging', 'to_file_num_backups') + if conf.getboolean("logging", "to_file"): + size_log_file = conf.getint("logging", "to_file_max_bytes") + num_log_files = conf.getint("logging", "to_file_num_backups") space_log_files = size_log_file * num_log_files - text_dict['mb_log'] = space_log_files + text_dict["mb_log"] = space_log_files # roughly, size of a current tor dir size_tor_dir = 19828000 - text_dict['mb_tor'] = round(size_tor_dir / 1000 ** 2) + text_dict["mb_tor"] = round(size_tor_dir / 1000 ** 2) # roughly, the size of this code and dependencies size_code_deps = 2097152 - text_dict['mb_code'] = round(size_code_deps / 1000 ** 2) + text_dict["mb_code"] = round(size_code_deps / 1000 ** 2) # Multiply per 2, just in case - size_total = (results_max_space_after_delete + - v3bw_max_space_after_delete + space_log_files + - size_tor_dir + size_code_deps) * 2 - text_dict['mb_total'] = round(size_total / 1000 ** 2) + size_total = ( + results_max_space_after_delete + + v3bw_max_space_after_delete + + space_log_files + + size_tor_dir + + size_code_deps + ) * 2 + text_dict["mb_total"] = round(size_total / 1000 ** 2) space_text = DISK_SPACE_TEXT.format(**text_dict) return space_text
@@ -97,11 +106,14 @@ def is_low_space(conf): what is needed for sbws and False otherwise needs. """ disk_required_mb = sbws_required_disk_space(conf) - disk_avail_mb = df(conf.getpath('paths', 'sbws_home')) + disk_avail_mb = df(conf.getpath("paths", "sbws_home")) if disk_avail_mb < disk_required_mb: - log.warn("The space left on the device (%s MiB) is less than " - "the minimum recommended to run sbws (%s MiB)." - "Run sbws cleanup to delete old sbws generated files.", - disk_avail_mb, disk_required_mb) + log.warn( + "The space left on the device (%s MiB) is less than " + "the minimum recommended to run sbws (%s MiB)." + "Run sbws cleanup to delete old sbws generated files.", + disk_avail_mb, + disk_required_mb, + ) return True return False diff --git a/sbws/util/iso3166.py b/sbws/util/iso3166.py index d537c18..7e23d02 100644 --- a/sbws/util/iso3166.py +++ b/sbws/util/iso3166.py @@ -13,25 +13,254 @@ when the destination Web Server is in a CDN. # It should be possible to obtain IP address location in that system too.
ISO_3166_ALPHA_2 = [ - 'AF', 'AX', 'AL', 'DZ', 'AS', 'AD', 'AO', 'AI', 'AQ', 'AG', 'AR', 'AM', - 'AW', 'AU', 'AT', 'AZ', 'BS', 'BH', 'BD', 'BB', 'BY', 'BE', 'BZ', 'BJ', - 'BM', 'BT', 'BO', 'BQ', 'BA', 'BW', 'BV', 'BR', 'IO', 'BN', 'BG', 'BF', - 'BI', 'CV', 'KH', 'CM', 'CA', 'KY', 'CF', 'TD', 'CL', 'CN', 'CX', 'CC', - 'CO', 'KM', 'CD', 'CG', 'CK', 'CR', 'CI', 'HR', 'CU', 'CW', 'CY', 'CZ', - 'DK', 'DJ', 'DM', 'DO', 'EC', 'EG', 'SV', 'GQ', 'ER', 'EE', 'SZ', 'ET', - 'FK', 'FO', 'FJ', 'FI', 'FR', 'GF', 'PF', 'TF', 'GA', 'GM', 'GE', 'DE', - 'GH', 'GI', 'GR', 'GL', 'GD', 'GP', 'GU', 'GT', 'GG', 'GN', 'GW', 'GY', - 'HT', 'HM', 'VA', 'HN', 'HK', 'HU', 'IS', 'IN', 'ID', 'IR', 'IQ', 'IE', - 'IM', 'IL', 'IT', 'JM', 'JP', 'JE', 'JO', 'KZ', 'KE', 'KI', 'KP', 'KR', - 'KW', 'KG', 'LA', 'LV', 'LB', 'LS', 'LR', 'LY', 'LI', 'LT', 'LU', 'MO', - 'MK', 'MG', 'MW', 'MY', 'MV', 'ML', 'MT', 'MH', 'MQ', 'MR', 'MU', 'YT', - 'MX', 'FM', 'MD', 'MC', 'MN', 'ME', 'MS', 'MA', 'MZ', 'MM', 'NA', 'NR', - 'NP', 'NL', 'NC', 'NZ', 'NI', 'NE', 'NG', 'NU', 'NF', 'MP', 'NO', 'OM', - 'PK', 'PW', 'PS', 'PA', 'PG', 'PY', 'PE', 'PH', 'PN', 'PL', 'PT', 'PR', - 'QA', 'RE', 'RO', 'RU', 'RW', 'BL', 'SH', 'KN', 'LC', 'MF', 'PM', 'VC', - 'WS', 'SM', 'ST', 'SA', 'SN', 'RS', 'SC', 'SL', 'SG', 'SX', 'SK', 'SI', - 'SB', 'SO', 'ZA', 'GS', 'SS', 'ES', 'LK', 'SD', 'SR', 'SJ', 'SE', 'CH', - 'SY', 'TW', 'TJ', 'TZ', 'TH', 'TL', 'TG', 'TK', 'TO', 'TT', 'TN', 'TR', - 'TM', 'TC', 'TV', 'UG', 'UA', 'AE', 'GB', 'UM', 'US', 'UY', 'UZ', 'VU', - 'VE', 'VN', 'VG', 'VI', 'WF', 'EH', 'YE', 'ZM', 'ZW', 'ZZ' - ] + "AF", + "AX", + "AL", + "DZ", + "AS", + "AD", + "AO", + "AI", + "AQ", + "AG", + "AR", + "AM", + "AW", + "AU", + "AT", + "AZ", + "BS", + "BH", + "BD", + "BB", + "BY", + "BE", + "BZ", + "BJ", + "BM", + "BT", + "BO", + "BQ", + "BA", + "BW", + "BV", + "BR", + "IO", + "BN", + "BG", + "BF", + "BI", + "CV", + "KH", + "CM", + "CA", + "KY", + "CF", + "TD", + "CL", + "CN", + "CX", + "CC", + "CO", + "KM", + "CD", + "CG", + "CK", + "CR", + "CI", + "HR", + "CU", + "CW", + "CY", + "CZ", + "DK", + "DJ", + "DM", + "DO", + "EC", + "EG", + "SV", + "GQ", + "ER", + "EE", + "SZ", + "ET", + "FK", + "FO", + "FJ", + "FI", + "FR", + "GF", + "PF", + "TF", + "GA", + "GM", + "GE", + "DE", + "GH", + "GI", + "GR", + "GL", + "GD", + "GP", + "GU", + "GT", + "GG", + "GN", + "GW", + "GY", + "HT", + "HM", + "VA", + "HN", + "HK", + "HU", + "IS", + "IN", + "ID", + "IR", + "IQ", + "IE", + "IM", + "IL", + "IT", + "JM", + "JP", + "JE", + "JO", + "KZ", + "KE", + "KI", + "KP", + "KR", + "KW", + "KG", + "LA", + "LV", + "LB", + "LS", + "LR", + "LY", + "LI", + "LT", + "LU", + "MO", + "MK", + "MG", + "MW", + "MY", + "MV", + "ML", + "MT", + "MH", + "MQ", + "MR", + "MU", + "YT", + "MX", + "FM", + "MD", + "MC", + "MN", + "ME", + "MS", + "MA", + "MZ", + "MM", + "NA", + "NR", + "NP", + "NL", + "NC", + "NZ", + "NI", + "NE", + "NG", + "NU", + "NF", + "MP", + "NO", + "OM", + "PK", + "PW", + "PS", + "PA", + "PG", + "PY", + "PE", + "PH", + "PN", + "PL", + "PT", + "PR", + "QA", + "RE", + "RO", + "RU", + "RW", + "BL", + "SH", + "KN", + "LC", + "MF", + "PM", + "VC", + "WS", + "SM", + "ST", + "SA", + "SN", + "RS", + "SC", + "SL", + "SG", + "SX", + "SK", + "SI", + "SB", + "SO", + "ZA", + "GS", + "SS", + "ES", + "LK", + "SD", + "SR", + "SJ", + "SE", + "CH", + "SY", + "TW", + "TJ", + "TZ", + "TH", + "TL", + "TG", + "TK", + "TO", + "TT", + "TN", + "TR", + "TM", + "TC", + "TV", + "UG", + "UA", + "AE", + "GB", + "UM", + "US", + "UY", + "UZ", + "VU", + "VE", + "VN", + "VG", + "VI", + "WF", + "EH", + "YE", + "ZM", + "ZW", + "ZZ", +] diff --git a/sbws/util/parser.py b/sbws/util/parser.py index 7ff39d2..6ded1f6 100644 --- a/sbws/util/parser.py +++ b/sbws/util/parser.py @@ -9,21 +9,25 @@ import os
def _default_dot_sbws_dname(): - home = os.path.expanduser('~') - return os.path.join(home, '.sbws') + home = os.path.expanduser("~") + return os.path.join(home, ".sbws")
def create_parser(): p = ArgumentParser(formatter_class=RawTextHelpFormatter) p.add_argument( - '--version', action='version', help='sbws version', - version='{}'.format(__version__)) - p.add_argument('--log-level', - choices=['debug', 'info', 'warning', 'error', 'critical'], - help='Override the sbws log level') - p.add_argument('-c', '--config', - help='Path to the sbws config file') - sub = p.add_subparsers(dest='command') + "--version", + action="version", + help="sbws version", + version="{}".format(__version__), + ) + p.add_argument( + "--log-level", + choices=["debug", "info", "warning", "error", "critical"], + help="Override the sbws log level", + ) + p.add_argument("-c", "--config", help="Path to the sbws config file") + sub = p.add_subparsers(dest="command") sbws.core.cleanup.gen_parser(sub) sbws.core.scanner.gen_parser(sub) sbws.core.generate.gen_parser(sub) diff --git a/sbws/util/requests.py b/sbws/util/requests.py index ede847d..442e60e 100644 --- a/sbws/util/requests.py +++ b/sbws/util/requests.py @@ -5,16 +5,17 @@ from sbws.util import stem as stem_utils
class TimedSession(requests.Session): - """Requests Session that sends timeout in the head and get methods. - """ + """Requests Session that sends timeout in the head and get methods."""
def get(self, url, **kwargs): - return super().get(url, timeout=getattr(self, "_timeout", None), - **kwargs) + return super().get( + url, timeout=getattr(self, "_timeout", None), **kwargs + )
def head(self, url, **kwargs): - return super().head(url, timeout=getattr(self, "_timeout", None), - **kwargs) + return super().head( + url, timeout=getattr(self, "_timeout", None), **kwargs + )
def make_session(controller, timeout): @@ -28,8 +29,8 @@ def make_session(controller, timeout): if socks_info is None: return None s.proxies = { - 'http': 'socks5h://{}:{}'.format(*socks_info), - 'https': 'socks5h://{}:{}'.format(*socks_info), + "http": "socks5h://{}:{}".format(*socks_info), + "https": "socks5h://{}:{}".format(*socks_info), } # ``_timeout`` is not used by request's Session, but it is by TimedSession. s._timeout = timeout diff --git a/sbws/util/state.py b/sbws/util/state.py index b6614a7..ce01f95 100644 --- a/sbws/util/state.py +++ b/sbws/util/state.py @@ -49,12 +49,12 @@ class State: if not os.path.exists(self._fname): return {} with FileLock(self._fname): - with open(self._fname, 'rt') as fd: + with open(self._fname, "rt") as fd: return json.load(fd, cls=CustomDecoder)
def _write(self): with FileLock(self._fname): - with open(self._fname, 'wt') as fd: + with open(self._fname, "wt") as fd: return json.dump(self._state, fd, indent=4, cls=CustomEncoder)
def __len__(self): diff --git a/sbws/util/stem.py b/sbws/util/stem.py index 638d50f..53260e2 100644 --- a/sbws/util/stem.py +++ b/sbws/util/stem.py @@ -1,9 +1,16 @@ import socks
-from stem.control import (Controller, Listener) -from stem import (SocketError, InvalidRequest, UnsatisfiableRequest, - OperationFailed, ControllerError, InvalidArguments, - ProtocolError, SocketClosed) +from stem.control import Controller, Listener +from stem import ( + SocketError, + InvalidRequest, + UnsatisfiableRequest, + OperationFailed, + ControllerError, + InvalidArguments, + ProtocolError, + SocketClosed, +) from stem.connection import IncorrectSocketType import stem.process from threading import RLock @@ -11,8 +18,15 @@ import copy import logging import os from sbws.globals import fail_hard -from sbws.globals import (TORRC_STARTING_POINT, TORRC_RUNTIME_OPTIONS, - TORRC_OPTIONS_CAN_FAIL, G, M, E, GE) +from sbws.globals import ( + TORRC_STARTING_POINT, + TORRC_RUNTIME_OPTIONS, + TORRC_OPTIONS_CAN_FAIL, + G, + M, + E, + GE, +) from sbws import settings
from stem import Flag @@ -22,23 +36,34 @@ stream_building_lock = RLock()
def attach_stream_to_circuit_listener(controller, circ_id): - ''' Returns a function that should be given to add_event_listener(). It - looks for newly created streams and attaches them to the given circ_id ''' + """Returns a function that should be given to add_event_listener(). It + looks for newly created streams and attaches them to the given circ_id"""
def closure_stream_event_listener(st): - if st.status == 'NEW' and st.purpose == 'USER': - log.debug('Attaching stream %s to circ %s %s', st.id, circ_id, - circuit_str(controller, circ_id)) + if st.status == "NEW" and st.purpose == "USER": + log.debug( + "Attaching stream %s to circ %s %s", + st.id, + circ_id, + circuit_str(controller, circ_id), + ) try: controller.attach_stream(st.id, circ_id) # So far we never saw this error. except ( - UnsatisfiableRequest, InvalidRequest, OperationFailed + UnsatisfiableRequest, + InvalidRequest, + OperationFailed, ) as e: - log.debug('Error attaching stream %s to circ %s: %s', - st.id, circ_id, e) + log.debug( + "Error attaching stream %s to circ %s: %s", + st.id, + circ_id, + e, + ) else: pass + return closure_stream_event_listener
@@ -64,7 +89,7 @@ def remove_event_listener(controller, func): def init_controller(conf): c = None # If the external control port is set, use it to initialize the controller. - control_port = conf['tor']['external_control_port'] + control_port = conf["tor"]["external_control_port"] if control_port: control_port = int(control_port) # If it can not connect, the program will exit here @@ -76,15 +101,15 @@ def init_controller(conf):
def is_bootstrapped(c): try: - line = c.get_info('status/bootstrap-phase') + line = c.get_info("status/bootstrap-phase") except (ControllerError, InvalidArguments, ProtocolError) as e: log.exception("Error trying to check bootstrap phase %s", e) return False state, _, progress, *_ = line.split() - progress = int(progress.split('=')[1]) - if state == 'NOTICE' and progress == 100: + progress = int(progress.split("=")[1]) + if state == "NOTICE" and progress == 100: return True - log.debug('Not bootstrapped. state={} progress={}'.format(state, progress)) + log.debug("Not bootstrapped. state={} progress={}".format(state, progress)) return False
@@ -128,7 +153,7 @@ def parse_user_torrc_config(torrc, torrc_text): NumCPUs 1 """ torrc_dict = torrc.copy() - for line in torrc_text.split('\n'): + for line in torrc_text.split("\n"): # Remove leading and trailing whitespace, if any line = line.strip() # Ignore blank lines @@ -158,8 +183,11 @@ def parse_user_torrc_config(torrc, torrc_text): assert isinstance(existing_val, list) existing_val.append(value) torrc_dict.update({key: existing_val}) - log.debug('Adding "%s %s" to torrc with which we are launching Tor', - key, value) + log.debug( + 'Adding "%s %s" to torrc with which we are launching Tor', + key, + value, + ) return torrc_dict
@@ -195,47 +223,52 @@ def set_torrc_options_can_fail(controller): try: controller.set_conf(k, v) except (InvalidArguments, InvalidRequest) as error: - log.debug('Ignoring option not supported by this Tor version. %s', - error) + log.debug( + "Ignoring option not supported by this Tor version. %s", error + ) except ControllerError as e: log.exception(e) exit(1)
def launch_tor(conf): - os.makedirs(conf.getpath('tor', 'datadir'), mode=0o700, exist_ok=True) - os.makedirs(conf.getpath('tor', 'log'), exist_ok=True) - os.makedirs(conf.getpath('tor', 'run_dpath'), mode=0o700, exist_ok=True) + os.makedirs(conf.getpath("tor", "datadir"), mode=0o700, exist_ok=True) + os.makedirs(conf.getpath("tor", "log"), exist_ok=True) + os.makedirs(conf.getpath("tor", "run_dpath"), mode=0o700, exist_ok=True) # Bare minimum things, more or less torrc = copy.deepcopy(TORRC_STARTING_POINT) # Very important and/or common settings that we don't know until runtime # The rest of the settings are in globals.py - torrc.update({ - 'DataDirectory': conf.getpath('tor', 'datadir'), - 'PidFile': conf.getpath('tor', 'pid'), - 'ControlSocket': conf.getpath('tor', 'control_socket'), - 'Log': [ - 'NOTICE file {}'.format(os.path.join(conf.getpath('tor', 'log'), - 'notice.log')), - ], - 'CircuitBuildTimeout': conf['general']['circuit_timeout'], - }) - - torrc = parse_user_torrc_config(torrc, conf['tor']['extra_lines']) + torrc.update( + { + "DataDirectory": conf.getpath("tor", "datadir"), + "PidFile": conf.getpath("tor", "pid"), + "ControlSocket": conf.getpath("tor", "control_socket"), + "Log": [ + "NOTICE file {}".format( + os.path.join(conf.getpath("tor", "log"), "notice.log") + ), + ], + "CircuitBuildTimeout": conf["general"]["circuit_timeout"], + } + ) + + torrc = parse_user_torrc_config(torrc, conf["tor"]["extra_lines"]) # Finally launch Tor try: # If there is already a tor process running with the same control # socket, this will exit here. stem.process.launch_tor_with_config( - torrc, init_msg_handler=log.debug, take_ownership=True) + torrc, init_msg_handler=log.debug, take_ownership=True + ) except Exception as e: - fail_hard('Error trying to launch tor: %s', e) + fail_hard("Error trying to launch tor: %s", e) log.info("Started own tor.") # And return a controller to it - cont = _init_controller_socket(conf.getpath('tor', 'control_socket')) + cont = _init_controller_socket(conf.getpath("tor", "control_socket")) # In the case it was not possible to connect to own tor socket. if not cont: - fail_hard('Could not connect to own tor control socket.') + fail_hard("Could not connect to own tor control socket.") return cont
@@ -250,13 +283,13 @@ def launch_or_connect_to_tor(conf): set_torrc_options_can_fail(cont) # Set runtime options set_torrc_runtime_options(cont) - log.info('Started or connected to Tor %s.', cont.get_version()) + log.info("Started or connected to Tor %s.", cont.get_version()) return cont
def get_socks_info(controller): - ''' Returns the first SocksPort Tor is configured to listen on, in the form - of an (address, port) tuple ''' + """Returns the first SocksPort Tor is configured to listen on, in the form + of an (address, port) tuple""" try: socks_ports = controller.get_listeners(Listener.SOCKS) return socks_ports[0] @@ -270,17 +303,17 @@ def get_socks_info(controller):
def only_relays_with_bandwidth(controller, relays, min_bw=None, max_bw=None): - ''' + """ Given a list of relays, only return those that optionally have above **min_bw** and optionally have below **max_bw**, inclusively. If neither min_bw nor max_bw are given, essentially just returns the input list of relays. - ''' + """ assert min_bw is None or min_bw >= 0 assert max_bw is None or max_bw >= 0 ret = [] for relay in relays: - assert hasattr(relay, 'consensus_bandwidth') + assert hasattr(relay, "consensus_bandwidth") if min_bw is not None and relay.consensus_bandwidth < min_bw: continue if max_bw is not None and relay.consensus_bandwidth > max_bw: @@ -295,16 +328,22 @@ def circuit_str(controller, circ_id): try: circ = controller.get_circuit(circ_id) except ValueError as e: - log.warning('Circuit %s no longer seems to exist so can't return ' - 'a valid circuit string for it: %s', circ_id, e) + log.warning( + "Circuit %s no longer seems to exist so can't return " + "a valid circuit string for it: %s", + circ_id, + e, + ) return None # exceptions raised when stopping the scanner except (ControllerError, SocketClosed, socks.GeneralProxyError) as e: log.debug(e) return None - return '[' +\ - ' -> '.join(['{} ({})'.format(n, fp[0:8]) for fp, n in circ.path]) +\ - ']' + return ( + "[" + + " -> ".join(["{} ({})".format(n, fp[0:8]) for fp, n in circ.path]) + + "]" + )
def is_torrc_starting_point_set(tor_controller): @@ -321,7 +360,9 @@ def is_torrc_starting_point_set(tor_controller): if v != value_set: log.exception( "Uncorrectly configured %s, should be %s, is %s", - k, v, value_set + k, + v, + value_set, ) bad_options = True if not bad_options: diff --git a/sbws/util/userquery.py b/sbws/util/userquery.py index 1bdeb22..4486668 100644 --- a/sbws/util/userquery.py +++ b/sbws/util/userquery.py @@ -1,6 +1,6 @@ # Based on https://stackoverflow.com/a/3041990 -def query_yes_no(question, default='yes'): - ''' +def query_yes_no(question, default="yes"): + """ Ask a yes/no question via input() and return the user's answer.
:param str question: Prompt given to the user. @@ -9,25 +9,26 @@ def query_yes_no(question, default='yes'): ``None`` (meaning an answer is required from the user). :returns: ``True`` if we ended up with a 'yes' answer, otherwise ``False``. - ''' - valid = {'yes': True, 'y': True, 'ye': True, 'no': False, 'n': False} + """ + valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} if default is None: - prompt = ' [y/n] ' - elif default == 'yes': - prompt = ' [Y/n] ' - elif default == 'no': - prompt = ' [y/N] ' + prompt = " [y/n] " + elif default == "yes": + prompt = " [Y/n] " + elif default == "no": + prompt = " [y/N] " else: raise ValueError('invalid default answer: "%s"' % default) prompt = question + prompt first_loop = True while True: choice = input(prompt).lower() - if default is not None and choice == '': + if default is not None and choice == "": return valid[default] elif choice in valid: return valid[choice] elif first_loop: - prompt = 'Please respond with "yes" or "no" (or y or n).\n' +\ - prompt + prompt = ( + 'Please respond with "yes" or "no" (or y or n).\n' + prompt + ) first_loop = False diff --git a/tests/conftest.py b/tests/conftest.py index 5f71b4a..fb3924b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,7 +13,7 @@ from sbws.lib import resultdump from sbws.util.parser import create_parser
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def parser(): return create_parser()
@@ -21,6 +21,7 @@ def parser(): @pytest.fixture() def datadir(request): """get, read, open test files from the tests relative "data" directory.""" + class D: def __init__(self, basepath): self.basepath = basepath @@ -38,6 +39,7 @@ def datadir(request): def readlines(self, name): with self.open(name, "r") as f: return f.readlines() + return D(request.fspath.dirpath("data"))
@@ -46,7 +48,10 @@ def root_data_path(): """Path to the data dir in the tests root, for both unit and integration tests. """ - return os.path.join(os.path.dirname(os.path.abspath(__file__)), "data",) + return os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "data", + )
@pytest.fixture(scope="session") @@ -119,14 +124,14 @@ def router_status(server_descriptor, router_statuses):
# Because of the function scoped `args` in `tests.unit.conftest`, this has to # be function scoped too. -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def relay_list(args, conf, controller): """Returns a RelayList containing the Relays in the controller""" with freeze_time("2020-02-29 10:00:00"): return relaylist.RelayList(args, conf, controller)
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def result_dump(args, conf): """Returns a ResultDump without Results""" # To stop the thread that would be waiting for new results diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 40309ef..37b4ea5 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -26,7 +26,7 @@ class _PseudoArguments(argparse.Namespace): setattr(self, key, kw[key])
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def tmpdir(tmpdir_factory, request): """Create a tmp dir for the tests""" base = str(hash(request.node.nodeid))[:3] @@ -34,7 +34,7 @@ def tmpdir(tmpdir_factory, request): return bn
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def sbwshome_empty(tmpdir): """Create sbws home inside of the test net tmp dir without initializing.""" home = "/tmp/.sbws" @@ -42,65 +42,70 @@ def sbwshome_empty(tmpdir): return home
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def sbwshome_dir(sbwshome_empty): """Create sbws home inside of the test net tmp dir without initializing.""" - os.makedirs(os.path.join(sbwshome_empty, 'datadir'), exist_ok=True) + os.makedirs(os.path.join(sbwshome_empty, "datadir"), exist_ok=True) return sbwshome_empty
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def test_config_path(tmpdir): """""" - config = tmpdir.join('.sbws.ini') + config = tmpdir.join(".sbws.ini") return config
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def args(sbwshome_empty, parser, test_config_path): """Args with sbws home in the tests tmp dir.""" - args = _PseudoArguments(config=test_config_path, output=sbwshome_empty, - scale=False, log_level='debug') + args = _PseudoArguments( + config=test_config_path, + output=sbwshome_empty, + scale=False, + log_level="debug", + ) return args
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def conf(sbwshome_dir): """Default configuration with sbws home in the tmp test dir.""" conf = _get_default_config() - conf['paths']['sbws_home'] = sbwshome_dir + conf["paths"]["sbws_home"] = sbwshome_dir conf["paths"]["state_fpath"] = os.path.join(sbwshome_dir, "state.dat") - conf['tor']['run_dpath'] = os.path.join(sbwshome_dir, 'tor', 'run') - conf['destinations']['foo'] = 'on' - conf['destinations.foo'] = {} + conf["tor"]["run_dpath"] = os.path.join(sbwshome_dir, "tor", "run") + conf["destinations"]["foo"] = "on" + conf["destinations.foo"] = {} # The test server is not using TLS. Ideally it should also support TLS # If the url would start with https but the request is not using TLS, # the request would hang. - conf['destinations.foo']['url'] = 'http://127.0.0.1:28888/sbws.bin' - conf['tor']['external_control_port'] = '8015' + conf["destinations.foo"]["url"] = "http://127.0.0.1:28888/sbws.bin" + conf["tor"]["external_control_port"] = "8015" return conf
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def persistent_launch_tor(conf): cont = launch_or_connect_to_tor(conf) return cont
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def rl(args, conf, persistent_launch_tor): return RelayList(args, conf, persistent_launch_tor)
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def cb(args, conf, persistent_launch_tor, rl): return CB(args, conf, persistent_launch_tor, rl)
-@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def dests(args, conf, persistent_launch_tor, cb, rl): - dests, error_msg = DestinationList.from_config(conf, cb, rl, - persistent_launch_tor) + dests, error_msg = DestinationList.from_config( + conf, cb, rl, persistent_launch_tor + ) assert dests, error_msg return dests
diff --git a/tests/integration/core/test_scanner.py b/tests/integration/core/test_scanner.py index a6879db..ca18ba0 100644 --- a/tests/integration/core/test_scanner.py +++ b/tests/integration/core/test_scanner.py @@ -6,31 +6,40 @@ import logging
def assert_within(value, target, radius): - ''' + """ Assert that **value** is within **radius** of **target**
If target is 10 and radius is 2, value can be anywhere between 8 and 12 inclusive - ''' - assert target - radius < value, 'Value is too small. {} is not within '\ - '{} of {}'.format(value, radius, target) - assert target + radius > value, 'Value is too big. {} is not within '\ - '{} of {}'.format(value, radius, target) + """ + assert ( + target - radius < value + ), "Value is too small. {} is not within " "{} of {}".format( + value, radius, target + ) + assert ( + target + radius > value + ), "Value is too big. {} is not within " "{} of {}".format( + value, radius, target + )
-@pytest.mark.skip(reason=("Disabled because chutney is not creating a network" - "with relay1mbyteMAB.")) +@pytest.mark.skip( + reason=( + "Disabled because chutney is not creating a network" + "with relay1mbyteMAB." + ) +) def test_measure_relay_with_maxadvertisedbandwidth( - persistent_launch_tor, sbwshome_dir, args, conf, - dests, cb, rl, caplog): + persistent_launch_tor, sbwshome_dir, args, conf, dests, cb, rl, caplog +): caplog.set_level(logging.DEBUG) # d = get_everything_to_measure(sbwshome, cont, args, conf) # rl = d['rl'] # dests = d['dests'] # cb = d['cb'] # 117A456C911114076BEB4E757AC48B16CC0CCC5F is relay1mbyteMAB - relay = [r for r in rl.relays - if r.nickname == 'relay1mbyteMAB'][0] + relay = [r for r in rl.relays if r.nickname == "relay1mbyteMAB"][0] # d['relay'] = relay result = measure_relay(args, conf, dests, cb, rl, relay) assert len(result) == 1 @@ -42,15 +51,15 @@ def test_measure_relay_with_maxadvertisedbandwidth( # This relay has MaxAdvertisedBandwidth set, but should not be limited # to just 1 Mbyte. Assume and assert that all downloads where at least # more than 10% faster than 1 MBps - assert dl['amount'] / dl['duration'] > one_mbyte * 1.1 + assert dl["amount"] / dl["duration"] > one_mbyte * 1.1 assert result.relay_average_bandwidth == one_mbyte
@pytest.mark.skip(reason="temporally disabled") def test_measure_relay_with_relaybandwidthrate( - persistent_launch_tor, args, conf, dests, cb, rl): - relay = [r for r in rl.relays - if r.nickname == 'relay1mbyteRBR'][0] + persistent_launch_tor, args, conf, dests, cb, rl +): + relay = [r for r in rl.relays if r.nickname == "relay1mbyteRBR"][0] result = measure_relay(args, conf, dests, cb, rl, relay) assert len(result) == 1 result = result[0] @@ -59,4 +68,4 @@ def test_measure_relay_with_relaybandwidthrate( allowed_error = 0.1 * one_mbyte # allow 10% error in either direction dls = result.downloads for dl in dls: - assert_within(dl['amount'] / dl['duration'], one_mbyte, allowed_error) + assert_within(dl["amount"] / dl["duration"], one_mbyte, allowed_error) diff --git a/tests/integration/lib/test_destination.py b/tests/integration/lib/test_destination.py index f23e266..a00e61e 100644 --- a/tests/integration/lib/test_destination.py +++ b/tests/integration/lib/test_destination.py @@ -1,30 +1,32 @@ """Integration tests for destination.py""" import sbws.util.requests as requests_utils -from sbws.lib.destination import (DestinationList, Destination, - connect_to_destination_over_circuit) +from sbws.lib.destination import ( + DestinationList, + Destination, + connect_to_destination_over_circuit, +)
def test_destination_list_no_usability_test_success( - conf, persistent_launch_tor, cb, rl - ): + conf, persistent_launch_tor, cb, rl +): # In a future refactor, if DestionationList is not initialized with the # controller, this test should be an unit test. destination_list, error_msg = DestinationList.from_config( conf, cb, rl, persistent_launch_tor - ) + ) # Because there's only 1 destination in conftest, random should return # the same one. - assert destination_list.next() == \ - destination_list._all_dests[0] + assert destination_list.next() == destination_list._all_dests[0]
-def test_connect_to_destination_over_circuit_success(persistent_launch_tor, - dests, cb, rl): +def test_connect_to_destination_over_circuit_success( + persistent_launch_tor, dests, cb, rl +): destination = dests.next() session = requests_utils.make_session(persistent_launch_tor, 10) # Choose a relay that is not an exit - relay = [r for r in rl.relays - if r.nickname == 'test005m'][0] + relay = [r for r in rl.relays if r.nickname == "test005m"][0] # Choose an exit, for this test it does not matter the bandwidth helper = rl.exits_not_bad_allowing_port(destination.port)[0] circuit_path = [relay.fingerprint, helper.fingerprint] @@ -32,19 +34,20 @@ def test_connect_to_destination_over_circuit_success(persistent_launch_tor, circuit_id, _ = cb.build_circuit(circuit_path) # Perform "usability test" is_usable, response = connect_to_destination_over_circuit( - destination, circuit_id, session, persistent_launch_tor, 1024) + destination, circuit_id, session, persistent_launch_tor, 1024 + ) assert is_usable is True - assert 'content_length' in response + assert "content_length" in response assert destination.is_functional()
-def test_connect_to_destination_over_circuit_fail(persistent_launch_tor, - dests, cb, rl): - bad_destination = Destination('https://example.example', 1024, False) +def test_connect_to_destination_over_circuit_fail( + persistent_launch_tor, dests, cb, rl +): + bad_destination = Destination("https://example.example", 1024, False) session = requests_utils.make_session(persistent_launch_tor, 10) # Choose a relay that is not an exit - relay = [r for r in rl.relays - if r.nickname == 'test005m'][0] + relay = [r for r in rl.relays if r.nickname == "test005m"][0] # Choose an exit, for this test it does not matter the bandwidth helper = rl.exits_not_bad_allowing_port(bad_destination.port)[0] circuit_path = [relay.fingerprint, helper.fingerprint] @@ -52,7 +55,8 @@ def test_connect_to_destination_over_circuit_fail(persistent_launch_tor, circuit_id, _ = cb.build_circuit(circuit_path) # Perform "usability test" is_usable, response = connect_to_destination_over_circuit( - bad_destination, circuit_id, session, persistent_launch_tor, 1024) + bad_destination, circuit_id, session, persistent_launch_tor, 1024 + ) assert is_usable is False
# because it is the first time it fails, failures aren't count @@ -60,20 +64,21 @@ def test_connect_to_destination_over_circuit_fail(persistent_launch_tor,
# fail three times in a row is_usable, response = connect_to_destination_over_circuit( - bad_destination, circuit_id, session, persistent_launch_tor, 1024) + bad_destination, circuit_id, session, persistent_launch_tor, 1024 + ) is_usable, response = connect_to_destination_over_circuit( - bad_destination, circuit_id, session, persistent_launch_tor, 1024) + bad_destination, circuit_id, session, persistent_launch_tor, 1024 + ) assert not bad_destination.is_functional()
def test_functional_destinations(conf, cb, rl, persistent_launch_tor): - good_destination = Destination('https://127.0.0.1:28888', 1024, False) - bad_destination = Destination('https://example.example', 1024, False) + good_destination = Destination("https://127.0.0.1:28888", 1024, False) + bad_destination = Destination("https://example.example", 1024, False)
session = requests_utils.make_session(persistent_launch_tor, 10) # Choose a relay that is not an exit - relay = [r for r in rl.relays - if r.nickname == 'test005m'][0] + relay = [r for r in rl.relays if r.nickname == "test005m"][0] # Choose an exit, for this test it does not matter the bandwidth helper = rl.exits_not_bad_allowing_port(bad_destination.port)[0] circuit_path = [relay.fingerprint, helper.fingerprint] @@ -82,14 +87,21 @@ def test_functional_destinations(conf, cb, rl, persistent_launch_tor):
# fail three times in a row is_usable, response = connect_to_destination_over_circuit( - bad_destination, circuit_id, session, persistent_launch_tor, 1024) + bad_destination, circuit_id, session, persistent_launch_tor, 1024 + ) is_usable, response = connect_to_destination_over_circuit( - bad_destination, circuit_id, session, persistent_launch_tor, 1024) + bad_destination, circuit_id, session, persistent_launch_tor, 1024 + ) is_usable, response = connect_to_destination_over_circuit( - bad_destination, circuit_id, session, persistent_launch_tor, 1024) + bad_destination, circuit_id, session, persistent_launch_tor, 1024 + )
destination_list = DestinationList( - conf, [good_destination, bad_destination], cb, rl, - persistent_launch_tor) + conf, + [good_destination, bad_destination], + cb, + rl, + persistent_launch_tor, + ) functional_destinations = destination_list.functional_destinations assert [good_destination] == functional_destinations diff --git a/tests/integration/lib/test_relaylist.py b/tests/integration/lib/test_relaylist.py index 26195cc..8311393 100644 --- a/tests/integration/lib/test_relaylist.py +++ b/tests/integration/lib/test_relaylist.py @@ -1,16 +1,15 @@ - - def test_relay_properties(rl): - relay = [relay for relay in rl.relays if relay.nickname == 'test000a'][0] + relay = [relay for relay in rl.relays if relay.nickname == "test000a"][0] # The fingerprint and the master key can't be tested cause they are # created by chutney. - assert 'Authority' in relay.flags + assert "Authority" in relay.flags assert not relay.exit_policy or not relay.exit_policy.is_exiting_allowed() assert relay.average_bandwidth == 1073741824 assert relay.consensus_bandwidth == 0 - assert relay.address == '127.0.0.1' + assert relay.address == "127.0.0.1"
def test_relay_list_last_consensus_timestamp(rl): - assert rl.last_consensus_timestamp == \ - rl._relays[0].last_consensus_timestamp + assert ( + rl.last_consensus_timestamp == rl._relays[0].last_consensus_timestamp + ) diff --git a/tests/integration/lib/test_relayprioritizer.py b/tests/integration/lib/test_relayprioritizer.py index c9622f1..05a6b49 100644 --- a/tests/integration/lib/test_relayprioritizer.py +++ b/tests/integration/lib/test_relayprioritizer.py @@ -11,8 +11,7 @@ def static_time(value): yield value
-def _build_result_for_relay(conf, rl, result_type, relay_nick, - timestamp): +def _build_result_for_relay(conf, rl, result_type, relay_nick, timestamp): relay = [r for r in rl.relays if r.nickname == relay_nick] assert len(relay) == 1 relay = relay[0] @@ -20,34 +19,49 @@ def _build_result_for_relay(conf, rl, result_type, relay_nick, circ = [relay.fingerprint, other.fingerprint] rtts = [0.5, 0.5, 0.5] dls = [ - {'amount': 1024, 'duration': 1}, - {'amount': 1024, 'duration': 1}, - {'amount': 1024, 'duration': 1}, + {"amount": 1024, "duration": 1}, + {"amount": 1024, "duration": 1}, + {"amount": 1024, "duration": 1}, ] if result_type == ResultSuccess: - return ResultSuccess(rtts, dls, relay, circ, - conf['destinations.foo']['url'], - 'test', t=timestamp) + return ResultSuccess( + rtts, + dls, + relay, + circ, + conf["destinations.foo"]["url"], + "test", + t=timestamp, + )
elif result_type == ResultErrorCircuit: - return ResultErrorCircuit(relay, circ, - conf['destinations.foo']['url'], - 'test', msg='Test error circ message', - t=timestamp) + return ResultErrorCircuit( + relay, + circ, + conf["destinations.foo"]["url"], + "test", + msg="Test error circ message", + t=timestamp, + )
-@patch('time.time') -def test_relayprioritizer_general(time_mock, sbwshome_empty, args, - conf, rl, - persistent_launch_tor): +@patch("time.time") +def test_relayprioritizer_general( + time_mock, sbwshome_empty, args, conf, rl, persistent_launch_tor +): now = 1000000 time_mock.side_effect = static_time(now) rd = ResultDump(args, conf) try: rp = RelayPrioritizer(args, conf, rl, rd) results = [ - _build_result_for_relay(conf, rl, ResultSuccess, - 'test{:03d}m'.format(i), now - (i * 100)) + _build_result_for_relay( + conf, + rl, + ResultSuccess, + "test{:03d}m".format(i), + now - (i * 100), + ) # In chutney the relays are from 003 to 011 for i in range(3, 12) ] @@ -61,7 +75,7 @@ def test_relayprioritizer_general(time_mock, sbwshome_empty, args, # The measured relays will be in inverse order to their name. best_list_measured = best_list[6:] for i in range(3, 12): - nick = 'test{:03d}m'.format(i) + nick = "test{:03d}m".format(i) # -1 To start by the back, - 2 because their names start by 3, # not 1 pos = (i - 2) * -1 diff --git a/tests/integration/util/test_requests.py b/tests/integration/util/test_requests.py index 9c55640..9c126bc 100644 --- a/tests/integration/util/test_requests.py +++ b/tests/integration/util/test_requests.py @@ -8,11 +8,15 @@ from sbws.util import requests as requests_utils
def test_make_session(conf, persistent_launch_tor, dests): uuid_str = str(uuid.uuid4()) - settings.init_http_headers(conf.get('scanner', 'nickname'), uuid_str, - str(persistent_launch_tor.get_version())) + settings.init_http_headers( + conf.get("scanner", "nickname"), + uuid_str, + str(persistent_launch_tor.get_version()), + ) session = requests_utils.make_session( - persistent_launch_tor, conf.getfloat('general', 'http_timeout')) - assert session._timeout == conf.getfloat('general', 'http_timeout') + persistent_launch_tor, conf.getfloat("general", "http_timeout") + ) + assert session._timeout == conf.getfloat("general", "http_timeout")
# Because there is not an stream attached to a circuit, this will timeout. response = None diff --git a/tests/integration/util/test_stem.py b/tests/integration/util/test_stem.py index e5492d6..b4e6d47 100644 --- a/tests/integration/util/test_stem.py +++ b/tests/integration/util/test_stem.py @@ -8,14 +8,14 @@ def test_launch_and_okay(persistent_launch_tor):
def test_set_torrc_runtime_option_succesful(persistent_launch_tor): controller = persistent_launch_tor - runtime_options = controller.get_conf_map(['__LeaveStreamsUnattached']) - assert runtime_options == {'__LeaveStreamsUnattached': ['1']} + runtime_options = controller.get_conf_map(["__LeaveStreamsUnattached"]) + assert runtime_options == {"__LeaveStreamsUnattached": ["1"]}
def test_set_torrc_runtime_invalidrequest_option_fail(persistent_launch_tor): controller = persistent_launch_tor try: - controller.set_conf('ControlSocket', '/tmp/dummy') + controller.set_conf("ControlSocket", "/tmp/dummy") except stem_utils.InvalidRequest as e: assert "Unable to set option" in e.message
@@ -23,6 +23,6 @@ def test_set_torrc_runtime_invalidrequest_option_fail(persistent_launch_tor): def test_set_torrc_options_can_fail_option_fail(persistent_launch_tor): controller = persistent_launch_tor try: - controller.set_conf('BadOption', '0') + controller.set_conf("BadOption", "0") except stem_utils.InvalidArguments as e: assert "Unknown option" in e.message diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 49db357..893b045 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -5,61 +5,79 @@ from datetime import datetime import os
from sbws.globals import RESULT_VERSION -from sbws.lib.resultdump import (ResultErrorStream, ResultSuccess, Result) +from sbws.lib.resultdump import ResultErrorStream, ResultSuccess, Result from sbws.lib.resultdump import write_result_to_datadir from sbws.util.config import _get_default_config
TIME1 = 1529232277.9028733 TIME2 = datetime.utcnow().timestamp() -FP1 = 'A' * 40 -FP2 = 'B' * 40 -ED25519 = 'g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s' +FP1 = "A" * 40 +FP2 = "B" * 40 +ED25519 = "g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s" CIRC12 = [FP1, FP2] CIRC21 = [FP2, FP1] -DEST_URL = 'http://example.com/sbws.bin' -NICK1 = 'A' -NICK2 = 'B' -IP1 = '169.254.100.1' -IP2 = '169.254.100.2' +DEST_URL = "http://example.com/sbws.bin" +NICK1 = "A" +NICK2 = "B" +IP1 = "169.254.100.1" +IP2 = "169.254.100.2" RTTS1 = [ - 0.2943556308746338, 0.2885427474975586, 0.2802879810333252, - 0.28124427795410156, 0.2827129364013672, 0.2901294231414795, - 0.2784590721130371, 0.2838578224182129, 0.2842121124267578, - 0.28656768798828125 + 0.2943556308746338, + 0.2885427474975586, + 0.2802879810333252, + 0.28124427795410156, + 0.2827129364013672, + 0.2901294231414795, + 0.2784590721130371, + 0.2838578224182129, + 0.2842121124267578, + 0.28656768798828125, ] RTTS2 = [ - 0.9097037315368652, 0.9293286800384521, 2.3764255046844482, - 0.869133710861206, 0.8188621997833252, 0.9046516418457031, - 1.3477752208709717, 0.8118226528167725, 0.8821918964385986, - 0.8746812343597412 + 0.9097037315368652, + 0.9293286800384521, + 2.3764255046844482, + 0.869133710861206, + 0.8188621997833252, + 0.9046516418457031, + 1.3477752208709717, + 0.8118226528167725, + 0.8821918964385986, + 0.8746812343597412, ] RTTS3 = [ - 0.510988712310791, 0.4889242649078369, 0.5003941059112549, - 0.49333715438842773, 0.5000274181365967, 0.5426476001739502, - 0.5190870761871338, 0.4908745288848877, 0.5516133308410645, - 0.4995298385620117 + 0.510988712310791, + 0.4889242649078369, + 0.5003941059112549, + 0.49333715438842773, + 0.5000274181365967, + 0.5426476001739502, + 0.5190870761871338, + 0.4908745288848877, + 0.5516133308410645, + 0.4995298385620117, ] DOWNLOADS1 = [ {"duration": 28.01000952720642, "amount": 25916542}, {"duration": 28.203476428985596, "amount": 25916542}, {"duration": 27.897520780563354, "amount": 25916542}, {"duration": 29.330559492111206, "amount": 25916542}, - {"duration": 27.93175745010376, "amount": 25916542} + {"duration": 27.93175745010376, "amount": 25916542}, ] DOWNLOADS2 = [ {"duration": 23.68175435066223, "amount": 81920}, {"duration": 27.667736768722534, "amount": 81920}, {"duration": 31.022956371307373, "amount": 81920}, {"duration": 33.020694971084595, "amount": 81920}, - {"duration": 33.59471535682678, "amount": 81920} + {"duration": 33.59471535682678, "amount": 81920}, ] DOWNLOADS3 = [ {"duration": 30.008347988128662, "amount": 644411}, {"duration": 30.73241639137268, "amount": 644411}, {"duration": 31.845987796783447, "amount": 644411}, {"duration": 29.703084230422974, "amount": 644411}, - {"duration": 30.438726663589478, "amount": 644411} + {"duration": 30.438726663589478, "amount": 644411}, ] SCANNER = "test" AVG_BW = 966080 @@ -70,22 +88,32 @@ UNMEASURED = False
now = datetime.utcnow()
-RELAY1 = Result.Relay(FP1, NICK1, IP1, ED25519, - average_bandwidth=AVG_BW, burst_bandwidth=BUR_BW, - observed_bandwidth=OBS_BW, consensus_bandwidth=BW, - consensus_bandwidth_is_unmeasured=UNMEASURED, - relay_in_recent_consensus=[now], - relay_recent_measurement_attempt=[now], - relay_recent_priority_list=[now]) +RELAY1 = Result.Relay( + FP1, + NICK1, + IP1, + ED25519, + average_bandwidth=AVG_BW, + burst_bandwidth=BUR_BW, + observed_bandwidth=OBS_BW, + consensus_bandwidth=BW, + consensus_bandwidth_is_unmeasured=UNMEASURED, + relay_in_recent_consensus=[now], + relay_recent_measurement_attempt=[now], + relay_recent_priority_list=[now], +) RELAY2 = Result.Relay(FP2, NICK2, IP2, ED25519)
RESULT = Result(RELAY1, CIRC12, DEST_URL, SCANNER, t=TIME1) -RESULT_SUCCESS1 = ResultSuccess(RTTS1, DOWNLOADS1, RELAY1, CIRC12, DEST_URL, - SCANNER, t=TIME1) -RESULT_SUCCESS2 = ResultSuccess(RTTS2, DOWNLOADS2, RELAY2, CIRC21, DEST_URL, - SCANNER, t=TIME2) -RESULT_ERROR_STREAM = ResultErrorStream(RELAY1, CIRC12, DEST_URL, SCANNER, - t=TIME1, msg="Something bad") +RESULT_SUCCESS1 = ResultSuccess( + RTTS1, DOWNLOADS1, RELAY1, CIRC12, DEST_URL, SCANNER, t=TIME1 +) +RESULT_SUCCESS2 = ResultSuccess( + RTTS2, DOWNLOADS2, RELAY2, CIRC21, DEST_URL, SCANNER, t=TIME2 +) +RESULT_ERROR_STREAM = ResultErrorStream( + RELAY1, CIRC12, DEST_URL, SCANNER, t=TIME1, msg="Something bad" +)
RESULTDICT_IP_CHANGED = {FP1: [RESULT_SUCCESS1, RESULT_SUCCESS2]} RESULTDICT_IP_NOT_CHANGED = {FP1: [RESULT_SUCCESS1, RESULT_SUCCESS1]} @@ -117,17 +145,21 @@ BASE_RESULT_DICT = RELAY_DICT.copy() BASE_RESULT_DICT.update(BASE_RESULT_NO_RELAY_DICT)
RESULT_ERROR_STREAM_DICT = BASE_RESULT_DICT.copy() -RESULT_ERROR_STREAM_DICT.update({ - "type": "error-stream", - "msg": "Something bad", -}) +RESULT_ERROR_STREAM_DICT.update( + { + "type": "error-stream", + "msg": "Something bad", + } +)
RESULT_SUCCESS_DICT = BASE_RESULT_DICT.copy() -RESULT_SUCCESS_DICT.update({ - "rtts": RTTS1, - "type": "success", - "downloads": DOWNLOADS1, -}) +RESULT_SUCCESS_DICT.update( + { + "rtts": RTTS1, + "type": "success", + "downloads": DOWNLOADS1, + } +)
class _PseudoArguments(argparse.Namespace): @@ -146,7 +178,7 @@ class _PseudoArguments(argparse.Namespace): setattr(self, key, kw[key])
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def tmpdir(tmpdir_factory, request): """Create a tmp dir for the tests""" base = str(hash(request.node.nodeid))[:3] @@ -154,53 +186,57 @@ def tmpdir(tmpdir_factory, request): return bn
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def sbwshome_empty(tmpdir): """Create sbws home inside of the tests tmp dir without initializing.""" - home = tmpdir.join('.sbws') + home = tmpdir.join(".sbws") os.makedirs(home.strpath, exist_ok=True) return home.strpath
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def sbwshome_only_datadir(sbwshome_empty): """Create sbws home inside of the tests tmp dir with only datadir.""" - os.makedirs(os.path.join(sbwshome_empty, 'datadir'), exist_ok=True) + os.makedirs(os.path.join(sbwshome_empty, "datadir"), exist_ok=True) return sbwshome_empty
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def test_config_path(tmpdir): """""" - config = tmpdir.join('.sbws.ini') + config = tmpdir.join(".sbws.ini") return config
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def args(sbwshome_empty, parser, test_config_path): """Args with sbws home in the tests tmp dir.""" - args = _PseudoArguments(config=test_config_path, output=sbwshome_empty, - scale=False, log_level='debug') + args = _PseudoArguments( + config=test_config_path, + output=sbwshome_empty, + scale=False, + log_level="debug", + ) return args
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def conf(sbwshome_empty, tmpdir): """Default configuration with sbws home in the tmp test dir.""" conf = _get_default_config() - conf['paths']['sbws_home'] = sbwshome_empty - conf['paths']['state_fpath'] = str(tmpdir.join('.sbws', 'state.dat')) + conf["paths"]["sbws_home"] = sbwshome_empty + conf["paths"]["state_fpath"] = str(tmpdir.join(".sbws", "state.dat")) return conf
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def conf_results(sbwshome_success_result_two_relays, conf): """Minimal configuration having a datadir
So that `ResultDump` does not raise AssertionError.
""" - conf['paths']['sbws_home'] = sbwshome_success_result_two_relays + conf["paths"]["sbws_home"] = sbwshome_success_result_two_relays return conf
@@ -244,44 +280,44 @@ def resultdict_ip_changed_trimmed(): return {FP1: [RESULT_SUCCESS2]}
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def sbwshome_error_result(sbwshome_only_datadir, conf): - ''' + """ Creates an ~/.sbws with a single fresh ResultError in it - ''' - dd = conf.getpath('paths', 'datadir') + """ + dd = conf.getpath("paths", "datadir") write_result_to_datadir(RESULT_ERROR_STREAM, dd) return sbwshome_only_datadir
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def sbwshome_success_result(sbwshome_only_datadir, conf): - ''' + """ Creates an ~/.sbws with a single fresh ResultSuccess in it - ''' - dd = conf.getpath('paths', 'datadir') + """ + dd = conf.getpath("paths", "datadir") write_result_to_datadir(RESULT_SUCCESS1, dd) return sbwshome_only_datadir
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def sbwshome_success_result_one_relay(sbwshome_only_datadir, conf): - ''' + """ Creates an ~/.sbws with a a couple of fresh ResultSuccess for one relay - ''' - dd = conf.getpath('paths', 'datadir') + """ + dd = conf.getpath("paths", "datadir") write_result_to_datadir(RESULT_SUCCESS1, dd) write_result_to_datadir(RESULT_SUCCESS1, dd) return sbwshome_only_datadir
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def sbwshome_success_result_two_relays(sbwshome_only_datadir, conf): - ''' + """ Creates an ~/.sbws with a a couple of fresh ResultSuccess for a couple or relays - ''' - dd = conf.getpath('paths', 'datadir') + """ + dd = conf.getpath("paths", "datadir") write_result_to_datadir(RESULT_SUCCESS1, dd) write_result_to_datadir(RESULT_SUCCESS1, dd) write_result_to_datadir(RESULT_SUCCESS2, dd) @@ -289,13 +325,15 @@ def sbwshome_success_result_two_relays(sbwshome_only_datadir, conf): return sbwshome_only_datadir
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def end_event(): import threading + return threading.Event()
-@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def rd(args, conf_results): from sbws.lib.resultdump import ResultDump + return ResultDump(args, conf_results) diff --git a/tests/unit/core/test_generate.py b/tests/unit/core/test_generate.py index aafe271..9c7a0d2 100644 --- a/tests/unit/core/test_generate.py +++ b/tests/unit/core/test_generate.py @@ -11,24 +11,24 @@ def test_gen_parser_arg_round_digs(): passed and round-digs is PROP276_ROUND_DIG by default.
""" - parent_parser = argparse.ArgumentParser(prog='sbws') - subparsers = parent_parser.add_subparsers(help='generate help') + parent_parser = argparse.ArgumentParser(prog="sbws") + subparsers = parent_parser.add_subparsers(help="generate help") parser_generate = gen_parser(subparsers) # Explicitely set empty arguments, otherwise pytest will use pytest # arguments args = parser_generate.parse_args([]) assert args.round_digs == PROP276_ROUND_DIG # torflow_round_digs is not in the Namespace - assert getattr(args, 'torflow_round_digs', None) is None + assert getattr(args, "torflow_round_digs", None) is None # but it can still be passed as an argument - args = parser_generate.parse_args(['--torflow-round-digs', - str(TORFLOW_ROUND_DIG)]) + args = parser_generate.parse_args( + ["--torflow-round-digs", str(TORFLOW_ROUND_DIG)] + ) # though the variable is named round_digs assert args.round_digs == TORFLOW_ROUND_DIG # or use the short version - args = parser_generate.parse_args(['-r', str(TORFLOW_ROUND_DIG)]) + args = parser_generate.parse_args(["-r", str(TORFLOW_ROUND_DIG)]) assert args.round_digs == TORFLOW_ROUND_DIG # or use round-digs explicitely - args = parser_generate.parse_args(['--round-digs', - str(PROP276_ROUND_DIG)]) + args = parser_generate.parse_args(["--round-digs", str(PROP276_ROUND_DIG)]) assert args.round_digs == PROP276_ROUND_DIG diff --git a/tests/unit/core/test_stats.py b/tests/unit/core/test_stats.py index fb29117..04d6f6c 100644 --- a/tests/unit/core/test_stats.py +++ b/tests/unit/core/test_stats.py @@ -7,45 +7,47 @@ import logging
def test_stats_initted(sbwshome_empty, args, conf, caplog): - ''' + """ An initialized but rather empty .sbws directory should fail about missing ~/.sbws/datadir - ''' + """ try: sbws.core.stats.main(args, conf) except SystemExit as e: assert e.code == 1 else: - assert None, 'Should have failed' - assert '{}/datadir does not exist'.format( - os.path.abspath(sbwshome_empty)) == caplog.records[-1].getMessage() + assert None, "Should have failed" + assert ( + "{}/datadir does not exist".format(os.path.abspath(sbwshome_empty)) + == caplog.records[-1].getMessage() + )
-def test_stats_stale_result(args, conf, caplog, - sbwshome_success_result): - ''' +def test_stats_stale_result(args, conf, caplog, sbwshome_success_result): + """ An initialized .sbws directory with no fresh results should say so and exit cleanly - ''' + """ caplog.set_level(logging.DEBUG) sbws.core.stats.main(args, conf) - assert 'No fresh results' == caplog.records[-1].getMessage() + assert "No fresh results" == caplog.records[-1].getMessage()
-@patch('time.time') -def test_stats_fresh_result(time_mock, sbwshome_error_result, args, conf, - capsys, caplog): - ''' +@patch("time.time") +def test_stats_fresh_result( + time_mock, sbwshome_error_result, args, conf, capsys, caplog +): + """ An initialized .sbws directory with a fresh error result should have some boring stats and exit cleanly - ''' + """ args.error_types = False start = 1529232278 time_mock.side_effect = monotonic_time(start=start) sbws.core.stats.main(args, conf) captured = capsys.readouterr() - lines = captured.out.strip().split('\n') - assert '1 relays have recent results' in lines[0] + lines = captured.out.strip().split("\n") + assert "1 relays have recent results" in lines[0] # FIXME # needed_output_lines = [ # '1 relays have recent results', @@ -64,20 +66,21 @@ def test_stats_fresh_result(time_mock, sbwshome_error_result, args, conf, # assert needed_line in lines
-@patch('time.time') -def test_stats_fresh_results(time_mock, sbwshome_success_result_two_relays, - args, conf, capsys, caplog): - ''' +@patch("time.time") +def test_stats_fresh_results( + time_mock, sbwshome_success_result_two_relays, args, conf, capsys, caplog +): + """ An initialized .sbws directory with a fresh error and fresh success should have some exciting stats and exit cleanly - ''' + """ caplog.set_level(logging.DEBUG) start = 1529232278 time_mock.side_effect = monotonic_time(start=start) sbws.core.stats.main(args, conf) captured = capsys.readouterr() - lines = captured.out.strip().split('\n') - assert '1 relays have recent results' in lines[0] + lines = captured.out.strip().split("\n") + assert "1 relays have recent results" in lines[0] # FIXME # needed_output_lines = [ # '1 relays have recent results', diff --git a/tests/unit/lib/test_destination.py b/tests/unit/lib/test_destination.py index b5a766f..99eae7a 100644 --- a/tests/unit/lib/test_destination.py +++ b/tests/unit/lib/test_destination.py @@ -16,7 +16,7 @@ def test_destination_is_functional(): (MAX_SECONDS_RETRY_DESTINATION / 2) + 2 )
- d = destination.Destination('unexistenturl', 0, False) + d = destination.Destination("unexistenturl", 0, False) assert d.is_functional()
# Fail 3 consecutive times diff --git a/tests/unit/lib/test_heartbeat.py b/tests/unit/lib/test_heartbeat.py index 8a2b8ea..1e8e876 100644 --- a/tests/unit/lib/test_heartbeat.py +++ b/tests/unit/lib/test_heartbeat.py @@ -10,12 +10,12 @@ from sbws.util.state import State def test_total_measured_percent(conf, caplog): state = State(conf["paths"]["state_fname"]) state["recent_priority_list"] = [1, 2, 3] - hbeat = heartbeat.Heartbeat(conf.getpath('paths', 'state_fname')) + hbeat = heartbeat.Heartbeat(conf.getpath("paths", "state_fname"))
- hbeat.register_consensus_fprs(['A', 'B', 'C']) + hbeat.register_consensus_fprs(["A", "B", "C"])
- hbeat.register_measured_fpr('A') - hbeat.register_measured_fpr('B') + hbeat.register_measured_fpr("A") + hbeat.register_measured_fpr("B")
caplog.set_level(logging.INFO)
diff --git a/tests/unit/lib/test_relaylist.py b/tests/unit/lib/test_relaylist.py index 399d897..0d1c6ee 100644 --- a/tests/unit/lib/test_relaylist.py +++ b/tests/unit/lib/test_relaylist.py @@ -21,7 +21,7 @@ def test_init_relays( Additionally, make sure the calculated min bw for the second hop for exit/non-exit relays is correct, too. """ - state = State(conf['paths']['state_fpath']) + state = State(conf["paths"]["state_fpath"]) # There is no need to mock datetime to update the consensus, since the # actual date will be always later. # But it's needed to have the correct list of timestamps both for RelayList @@ -81,7 +81,7 @@ def test_increment_recent_measurement_attempt(args, conf, controller): And instead it only counts the number of attempts in the last days. It also tests that the state file is updated correctly. """ - state = State(conf['paths']['state_fpath']) + state = State(conf["paths"]["state_fpath"]) with freeze_time("2020-02-29 10:00:00"): relay_list = RelayList(args, conf, controller=controller, state=state) # The initial count is 0 and the state does not have that key. diff --git a/tests/unit/lib/test_resultdump.py b/tests/unit/lib/test_resultdump.py index 5c1bd0d..f85e0d1 100644 --- a/tests/unit/lib/test_resultdump.py +++ b/tests/unit/lib/test_resultdump.py @@ -10,7 +10,7 @@ from sbws.lib.resultdump import ( ResultErrorStream, ResultSuccess, trim_results_ip_changed, - load_result_file + load_result_file, )
@@ -20,34 +20,43 @@ def test_trim_results_ip_changed_defaults(resultdict_ip_not_changed):
def test_trim_results_ip_changed_on_changed_ipv4_changed( - resultdict_ip_changed, resultdict_ip_changed_trimmed): - results_dict = trim_results_ip_changed(resultdict_ip_changed, - on_changed_ipv4=True) + resultdict_ip_changed, resultdict_ip_changed_trimmed +): + results_dict = trim_results_ip_changed( + resultdict_ip_changed, on_changed_ipv4=True + ) assert resultdict_ip_changed_trimmed == results_dict
def test_trim_results_ip_changed_on_changed_ipv4_no_changed( - resultdict_ip_not_changed): - results_dict = trim_results_ip_changed(resultdict_ip_not_changed, - on_changed_ipv4=True) + resultdict_ip_not_changed, +): + results_dict = trim_results_ip_changed( + resultdict_ip_not_changed, on_changed_ipv4=True + ) assert resultdict_ip_not_changed == results_dict
-def test_trim_results_ip_changed_on_changed_ipv6(caplog, - resultdict_ip_not_changed): - results_dict = trim_results_ip_changed(resultdict_ip_not_changed, - on_changed_ipv6=True) +def test_trim_results_ip_changed_on_changed_ipv6( + caplog, resultdict_ip_not_changed +): + results_dict = trim_results_ip_changed( + resultdict_ip_not_changed, on_changed_ipv6=True + ) assert resultdict_ip_not_changed == results_dict # There might be other logs from other threads. with caplog.at_level(logging.WARNING): - assert 'Reseting bandwidth results when IPv6 changes, ' \ - 'is not yet implemented.\n' in caplog.text + assert ( + "Reseting bandwidth results when IPv6 changes, " + "is not yet implemented.\n" in caplog.text + )
def test_resultdump( rd, args, conf_results, controller, router_status, server_descriptor ): from sbws import settings + relay = Relay( router_status.fingerprint, controller, @@ -57,7 +66,12 @@ def test_resultdump( relay.increment_relay_recent_priority_list() relay.increment_relay_recent_measurement_attempt() r = ResultSuccess( - [], 2000, relay, ["A", "B"], "http://localhost/bw", "scanner_nick", + [], + 2000, + relay, + ["A", "B"], + "http://localhost/bw", + "scanner_nick", ) # Storing the result with `rd.queue.put` will not store the result to disk # because the thread is not spawned with pytest. @@ -69,7 +83,10 @@ def test_resultdump( assert 1 == len(results[0].relay_recent_priority_list) # Store a second result for the sme relay r = ResultError( - relay, ["A", "B"], "http://localhost/bw", "scanner_nick", + relay, + ["A", "B"], + "http://localhost/bw", + "scanner_nick", ) rd.store_result(r) assert 2 == len(results) diff --git a/tests/unit/lib/test_results.py b/tests/unit/lib/test_results.py index dccca09..d1a9146 100644 --- a/tests/unit/lib/test_results.py +++ b/tests/unit/lib/test_results.py @@ -13,41 +13,41 @@ from sbws.lib.relaylist import Relay
def test_Result(result): - ''' + """ A standard Result should not be convertible to a string because Result.type is not implemented. - ''' + """ try: str(result) print(str(result)) except NotImplementedError: pass else: - assert None, 'Should have failed' + assert None, "Should have failed"
def test_Result_from_dict_bad_version(): - ''' + """ The first thing that is checked is the version field, and a wrong one should return None - ''' - d = {'version': RESULT_VERSION + 1} + """ + d = {"version": RESULT_VERSION + 1} r = Result.from_dict(d) assert r is None
def test_Result_from_dict_bad_type(): - ''' + """ If the result type string doesn't match any of the known types, then it should throw NotImplementedError - ''' - d = {'version': RESULT_VERSION, 'type': 'NotARealType'} + """ + d = {"version": RESULT_VERSION, "type": "NotARealType"} try: Result.from_dict(d) except NotImplementedError as e: - assert str(e) == 'Unknown result type NotARealType' + assert str(e) == "Unknown result type NotARealType" else: - assert None, 'Should have failed' + assert None, "Should have failed"
def test_ResultSuccess_from_dict(result_success, result_success_dict): @@ -62,23 +62,22 @@ def test_ResultError_from_dict(result_error_stream, result_error_stream_dict): assert str(result_error_stream) == str(r2)
-@patch('time.time') +@patch("time.time") def test_ResultErrorCircuit(time_mock): t = 2000 time_mock.side_effect = monotonic_time(start=t) - fp1 = 'A' * 40 - fp2 = 'Z' * 40 - ed25519 = 'g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s' + fp1 = "A" * 40 + fp2 = "Z" * 40 + ed25519 = "g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s" circ = [fp1, fp2] - dest_url = 'http://example.com/sbws.bin' - scanner_nick = 'sbwsscanner' - nick = 'Mooooooo' - relay_ip = '169.254.100.1' + dest_url = "http://example.com/sbws.bin" + scanner_nick = "sbwsscanner" + nick = "Mooooooo" + relay_ip = "169.254.100.1" relay = Result.Relay(fp1, nick, relay_ip, ed25519) - msg = 'aaaaayyyyyy bb' + msg = "aaaaayyyyyy bb" r1 = ResultErrorCircuit(relay, circ, dest_url, scanner_nick, msg=msg) - r2 = ResultErrorCircuit(relay, circ, dest_url, scanner_nick, msg=msg, - t=t) + r2 = ResultErrorCircuit(relay, circ, dest_url, scanner_nick, msg=msg, t=t) assert r1.msg == msg assert r1.nickname == nick assert r1.time == t @@ -92,27 +91,33 @@ def test_ResultErrorCircuit(time_mock): assert str(r1) == str(r2)
-@patch('time.time') +@patch("time.time") def test_ResultErrorCircuit_from_dict(time_mock): t = 2000 time_mock.side_effect = monotonic_time(start=t) - fp1 = 'A' * 40 - fp2 = 'Z' * 40 - ed25519 = 'g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s' + fp1 = "A" * 40 + fp2 = "Z" * 40 + ed25519 = "g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s" circ = [fp1, fp2] - dest_url = 'http://example.com/sbws.bin' - scanner_nick = 'sbwsscanner' - nick = 'Mooooooo' - relay_ip = '169.254.100.1' + dest_url = "http://example.com/sbws.bin" + scanner_nick = "sbwsscanner" + nick = "Mooooooo" + relay_ip = "169.254.100.1" relay = Result.Relay(fp1, nick, relay_ip, ed25519) - msg = 'aaaaayyyyyy bb' + msg = "aaaaayyyyyy bb" r1 = ResultErrorCircuit(relay, circ, dest_url, scanner_nick, msg=msg) d = { - 'msg': msg, 'fingerprint': fp1, - 'nickname': nick, 'address': relay_ip, 'circ': circ, - 'dest_url': dest_url, 'scanner': scanner_nick, - 'version': RESULT_VERSION, 'type': _ResultType.ErrorCircuit, 'time': t, - 'master_key_ed25519': ed25519, + "msg": msg, + "fingerprint": fp1, + "nickname": nick, + "address": relay_ip, + "circ": circ, + "dest_url": dest_url, + "scanner": scanner_nick, + "version": RESULT_VERSION, + "type": _ResultType.ErrorCircuit, + "time": t, + "master_key_ed25519": ed25519, } r2 = Result.from_dict(d) assert isinstance(r1, ResultErrorCircuit) @@ -120,23 +125,22 @@ def test_ResultErrorCircuit_from_dict(time_mock): assert str(r1) == str(r2)
-@patch('time.time') +@patch("time.time") def test_ResultErrorStream(time_mock): t = 2000 time_mock.side_effect = monotonic_time(start=t) - fp1 = 'A' * 40 - fp2 = 'Z' * 40 - ed25519 = 'g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s' + fp1 = "A" * 40 + fp2 = "Z" * 40 + ed25519 = "g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s" circ = [fp1, fp2] - dest_url = 'http://example.com/sbws.bin' - scanner_nick = 'sbwsscanner' - nick = 'Mooooooo' - relay_ip = '169.254.100.1' + dest_url = "http://example.com/sbws.bin" + scanner_nick = "sbwsscanner" + nick = "Mooooooo" + relay_ip = "169.254.100.1" relay = Result.Relay(fp1, nick, relay_ip, ed25519) - msg = 'aaaaayyyyyy bb' + msg = "aaaaayyyyyy bb" r1 = ResultErrorStream(relay, circ, dest_url, scanner_nick, msg=msg) - r2 = ResultErrorStream(relay, circ, dest_url, scanner_nick, msg=msg, - t=t) + r2 = ResultErrorStream(relay, circ, dest_url, scanner_nick, msg=msg, t=t) assert r1.msg == msg assert r1.nickname == nick assert r1.time == t @@ -150,27 +154,33 @@ def test_ResultErrorStream(time_mock): assert str(r1) == str(r2)
-@patch('time.time') +@patch("time.time") def test_ResultErrorStream_from_dict(time_mock): t = 2000 time_mock.side_effect = monotonic_time(start=t) - fp1 = 'A' * 40 - fp2 = 'Z' * 40 - ed25519 = 'g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s' + fp1 = "A" * 40 + fp2 = "Z" * 40 + ed25519 = "g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s" circ = [fp1, fp2] - dest_url = 'http://example.com/sbws.bin' - scanner_nick = 'sbwsscanner' - nick = 'Mooooooo' - relay_ip = '169.254.100.1' + dest_url = "http://example.com/sbws.bin" + scanner_nick = "sbwsscanner" + nick = "Mooooooo" + relay_ip = "169.254.100.1" relay = Result.Relay(fp1, nick, relay_ip, ed25519) - msg = 'aaaaayyyyyy bb' + msg = "aaaaayyyyyy bb" r1 = ResultErrorStream(relay, circ, dest_url, scanner_nick, msg=msg) d = { - 'msg': msg, 'fingerprint': fp1, - 'nickname': nick, 'address': relay_ip, 'circ': circ, - 'dest_url': dest_url, 'scanner': scanner_nick, - 'version': RESULT_VERSION, 'type': _ResultType.ErrorStream, 'time': t, - 'master_key_ed25519': ed25519, + "msg": msg, + "fingerprint": fp1, + "nickname": nick, + "address": relay_ip, + "circ": circ, + "dest_url": dest_url, + "scanner": scanner_nick, + "version": RESULT_VERSION, + "type": _ResultType.ErrorStream, + "time": t, + "master_key_ed25519": ed25519, } r2 = Result.from_dict(d) assert isinstance(r1, ResultErrorStream) @@ -178,23 +188,22 @@ def test_ResultErrorStream_from_dict(time_mock): assert str(r1) == str(r2)
-@patch('time.time') +@patch("time.time") def test_ResultErrorAuth(time_mock): t = 2000 time_mock.side_effect = monotonic_time(start=t) - fp1 = 'A' * 40 - fp2 = 'Z' * 40 - ed25519 = 'g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s' + fp1 = "A" * 40 + fp2 = "Z" * 40 + ed25519 = "g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s" circ = [fp1, fp2] - dest_url = 'http://example.com/sbws.bin' - scanner_nick = 'sbwsscanner' - nick = 'Mooooooo' - relay_ip = '169.254.100.1' + dest_url = "http://example.com/sbws.bin" + scanner_nick = "sbwsscanner" + nick = "Mooooooo" + relay_ip = "169.254.100.1" relay = Result.Relay(fp1, nick, relay_ip, ed25519) - msg = 'aaaaayyyyyy bb' + msg = "aaaaayyyyyy bb" r1 = ResultErrorAuth(relay, circ, dest_url, scanner_nick, msg=msg) - r2 = ResultErrorAuth(relay, circ, dest_url, scanner_nick, msg=msg, - t=t) + r2 = ResultErrorAuth(relay, circ, dest_url, scanner_nick, msg=msg, t=t) assert r1.msg == msg assert r1.nickname == nick assert r1.time == t @@ -208,27 +217,33 @@ def test_ResultErrorAuth(time_mock): assert str(r1) == str(r2)
-@patch('time.time') +@patch("time.time") def test_ResultErrorAuth_from_dict(time_mock): t = 2000 time_mock.side_effect = monotonic_time(start=t) - fp1 = 'A' * 40 - fp2 = 'Z' * 40 - ed25519 = 'g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s' + fp1 = "A" * 40 + fp2 = "Z" * 40 + ed25519 = "g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s" circ = [fp1, fp2] - dest_url = 'http://example.com/sbws.bin' - scanner_nick = 'sbwsscanner' - nick = 'Mooooooo' - relay_ip = '169.254.100.1' + dest_url = "http://example.com/sbws.bin" + scanner_nick = "sbwsscanner" + nick = "Mooooooo" + relay_ip = "169.254.100.1" relay = Result.Relay(fp1, nick, relay_ip, ed25519) - msg = 'aaaaayyyyyy bb' + msg = "aaaaayyyyyy bb" r1 = ResultErrorAuth(relay, circ, dest_url, scanner_nick, msg=msg) d = { - 'msg': msg, 'fingerprint': fp1, - 'nickname': nick, 'address': relay_ip, 'circ': circ, - 'dest_url': dest_url, 'scanner': scanner_nick, - 'version': RESULT_VERSION, 'type': _ResultType.ErrorAuth, 'time': t, - 'master_key_ed25519': ed25519, + "msg": msg, + "fingerprint": fp1, + "nickname": nick, + "address": relay_ip, + "circ": circ, + "dest_url": dest_url, + "scanner": scanner_nick, + "version": RESULT_VERSION, + "type": _ResultType.ErrorAuth, + "time": t, + "master_key_ed25519": ed25519, } r2 = Result.from_dict(d) assert isinstance(r1, ResultErrorAuth) @@ -247,12 +262,22 @@ def test_relay_in_recent_consensus_count( ) # Initialize the ResultSuccess as `measure_relay` does r = ResultSuccess( - [], 2000, relay, ["A", "B"], "http://localhost/bw", "scanner_nick", + [], + 2000, + relay, + ["A", "B"], + "http://localhost/bw", + "scanner_nick", ) assert 1 == len(r.relay_in_recent_consensus) relay.update_relay_in_recent_consensus() r = ResultSuccess( - [], 2000, relay, ["A", "B"], "http://localhost/bw", "scanner_nick", + [], + 2000, + relay, + ["A", "B"], + "http://localhost/bw", + "scanner_nick", ) assert 2 == len(r.relay_in_recent_consensus)
@@ -270,7 +295,12 @@ def test_relay_recent_measurement_attempt_count( relay.increment_relay_recent_measurement_attempt() # Initialize the ResultSuccess as `measure_relay` does r = ResultSuccess( - [], 2000, relay, ["A", "B"], "http://localhost/bw", "scanner_nick", + [], + 2000, + relay, + ["A", "B"], + "http://localhost/bw", + "scanner_nick", ) assert 2 == len(r.relay_recent_measurement_attempt)
@@ -288,6 +318,11 @@ def test_relay_recent_priority_list_count( relay.increment_relay_recent_priority_list() # Initialize the ResultSuccess as `measure_relay` does r = ResultSuccess( - [], 2000, relay, ["A", "B"], "http://localhost/bw", "scanner_nick", + [], + 2000, + relay, + ["A", "B"], + "http://localhost/bw", + "scanner_nick", ) assert 2 == len(r.relay_recent_priority_list) diff --git a/tests/unit/lib/test_scaling.py b/tests/unit/lib/test_scaling.py index e55c2b0..baf195f 100644 --- a/tests/unit/lib/test_scaling.py +++ b/tests/unit/lib/test_scaling.py @@ -8,8 +8,11 @@ from sbws.lib.resultdump import load_result_file, ResultSuccess
def test_bw_filt(): bw_measurements = [ - 96700.00922329757, 70311.63051659254, 45531.743347556374, - 38913.97025485627, 55656.332364676025 + 96700.00922329757, + 70311.63051659254, + 45531.743347556374, + 38913.97025485627, + 55656.332364676025, ] fb = scaling.bw_filt(bw_measurements) # This is greater than the mean, that is 61422.73714139576 @@ -59,11 +62,11 @@ def test_bw_filt_from_results(root_data_path): bw_filts[fp] = (mu, muf) for fp, values in bw_filts.items(): assert bw_filts[fp][0] <= bw_filts[fp][1] - assert 5526756 == bw_filts['117A456C911114076BEB4E757AC48B16CC0CCC5F'][0] - assert 5643086 == bw_filts['117A456C911114076BEB4E757AC48B16CC0CCC5F'][1] - assert 5664965 == bw_filts['693F73187624BE760AAD2A12C5ED89DB1DE044F5'][0] - assert 5774274 == bw_filts['693F73187624BE760AAD2A12C5ED89DB1DE044F5'][1] - assert 5508279 == bw_filts['270A861ABED22EC2B625198BCCD7B2B9DBFFC93C'][0] - assert 5583737 == bw_filts['270A861ABED22EC2B625198BCCD7B2B9DBFFC93C'][1] - assert 5379911 == bw_filts['E894C65997F8EC96558B554176EEEA39C6A43EF6'][0] - assert 5485088 == bw_filts['E894C65997F8EC96558B554176EEEA39C6A43EF6'][1] + assert 5526756 == bw_filts["117A456C911114076BEB4E757AC48B16CC0CCC5F"][0] + assert 5643086 == bw_filts["117A456C911114076BEB4E757AC48B16CC0CCC5F"][1] + assert 5664965 == bw_filts["693F73187624BE760AAD2A12C5ED89DB1DE044F5"][0] + assert 5774274 == bw_filts["693F73187624BE760AAD2A12C5ED89DB1DE044F5"][1] + assert 5508279 == bw_filts["270A861ABED22EC2B625198BCCD7B2B9DBFFC93C"][0] + assert 5583737 == bw_filts["270A861ABED22EC2B625198BCCD7B2B9DBFFC93C"][1] + assert 5379911 == bw_filts["E894C65997F8EC96558B554176EEEA39C6A43EF6"][0] + assert 5485088 == bw_filts["E894C65997F8EC96558B554176EEEA39C6A43EF6"][1] diff --git a/tests/unit/lib/test_v3bwfile.py b/tests/unit/lib/test_v3bwfile.py index d1448d1..6fc3163 100644 --- a/tests/unit/lib/test_v3bwfile.py +++ b/tests/unit/lib/test_v3bwfile.py @@ -8,200 +8,241 @@ import pytest from unittest import mock
from sbws import __version__ as version -from sbws.globals import (SPEC_VERSION, SBWS_SCALING, TORFLOW_SCALING, - MIN_REPORT, TORFLOW_ROUND_DIG, PROP276_ROUND_DIG) +from sbws.globals import ( + SPEC_VERSION, + SBWS_SCALING, + TORFLOW_SCALING, + MIN_REPORT, + TORFLOW_ROUND_DIG, + PROP276_ROUND_DIG, +) from sbws.lib.resultdump import Result, load_result_file, ResultSuccess from sbws.lib.v3bwfile import ( - V3BWHeader, V3BWLine, TERMINATOR, LINE_SEP, - KEYVALUE_SEP_V1, num_results_of_type, - V3BWFile, round_sig_dig, - HEADER_RECENT_MEASUREMENTS_EXCLUDED_KEYS - ) + V3BWHeader, + V3BWLine, + TERMINATOR, + LINE_SEP, + KEYVALUE_SEP_V1, + num_results_of_type, + V3BWFile, + round_sig_dig, + HEADER_RECENT_MEASUREMENTS_EXCLUDED_KEYS, +) from sbws.util.state import CustomDecoder from sbws.util.timestamp import now_fname, now_isodt_str, now_unixts
timestamp = 1523974147 timestamp_l = str(timestamp) -version_l = KEYVALUE_SEP_V1.join(['version', SPEC_VERSION]) -scanner_country = 'US' -scanner_country_l = KEYVALUE_SEP_V1.join(['scanner_country', scanner_country]) -destinations_countries = '00,DE' -destinations_countries_l = KEYVALUE_SEP_V1.join(['destinations_countries', - destinations_countries]) -software_l = KEYVALUE_SEP_V1.join(['software', 'sbws']) -software_version_l = KEYVALUE_SEP_V1.join(['software_version', version]) -file_created = '2018-04-25T13:10:57' -file_created_l = KEYVALUE_SEP_V1.join(['file_created', file_created]) -latest_bandwidth = '2018-04-17T14:09:07' -latest_bandwidth_l = KEYVALUE_SEP_V1.join(['latest_bandwidth', - latest_bandwidth]) -attempts = '1' -attempts_l = KEYVALUE_SEP_V1.join(['recent_measurement_attempt_count', - attempts]) -failure = '0' -failure_l = KEYVALUE_SEP_V1.join(['recent_measurement_failure_count', - failure]) -header_ls = [timestamp_l, version_l, destinations_countries_l, file_created_l, - latest_bandwidth_l, - # attempts_l, failure_l, - scanner_country_l, software_l, software_version_l, TERMINATOR] +version_l = KEYVALUE_SEP_V1.join(["version", SPEC_VERSION]) +scanner_country = "US" +scanner_country_l = KEYVALUE_SEP_V1.join(["scanner_country", scanner_country]) +destinations_countries = "00,DE" +destinations_countries_l = KEYVALUE_SEP_V1.join( + ["destinations_countries", destinations_countries] +) +software_l = KEYVALUE_SEP_V1.join(["software", "sbws"]) +software_version_l = KEYVALUE_SEP_V1.join(["software_version", version]) +file_created = "2018-04-25T13:10:57" +file_created_l = KEYVALUE_SEP_V1.join(["file_created", file_created]) +latest_bandwidth = "2018-04-17T14:09:07" +latest_bandwidth_l = KEYVALUE_SEP_V1.join( + ["latest_bandwidth", latest_bandwidth] +) +attempts = "1" +attempts_l = KEYVALUE_SEP_V1.join( + ["recent_measurement_attempt_count", attempts] +) +failure = "0" +failure_l = KEYVALUE_SEP_V1.join(["recent_measurement_failure_count", failure]) +header_ls = [ + timestamp_l, + version_l, + destinations_countries_l, + file_created_l, + latest_bandwidth_l, + # attempts_l, failure_l, + scanner_country_l, + software_l, + software_version_l, + TERMINATOR, +] header_str = LINE_SEP.join(header_ls) + LINE_SEP -earliest_bandwidth = '2018-04-16T14:09:07' -earliest_bandwidth_l = KEYVALUE_SEP_V1.join(['earliest_bandwidth', - earliest_bandwidth]) -generator_started = '2018-04-16T14:09:05' -generator_started_l = KEYVALUE_SEP_V1.join(['generator_started', - generator_started]) -tor_version = '0.4.2.5' -tor_version_l = KEYVALUE_SEP_V1.join(['tor_version', tor_version]) - -header_extra_ls = [timestamp_l, version_l, - earliest_bandwidth_l, file_created_l, generator_started_l, - latest_bandwidth_l, - software_l, software_version_l, tor_version_l, - TERMINATOR] +earliest_bandwidth = "2018-04-16T14:09:07" +earliest_bandwidth_l = KEYVALUE_SEP_V1.join( + ["earliest_bandwidth", earliest_bandwidth] +) +generator_started = "2018-04-16T14:09:05" +generator_started_l = KEYVALUE_SEP_V1.join( + ["generator_started", generator_started] +) +tor_version = "0.4.2.5" +tor_version_l = KEYVALUE_SEP_V1.join(["tor_version", tor_version]) + +header_extra_ls = [ + timestamp_l, + version_l, + earliest_bandwidth_l, + file_created_l, + generator_started_l, + latest_bandwidth_l, + software_l, + software_version_l, + tor_version_l, + TERMINATOR, +] header_extra_str = LINE_SEP.join(header_extra_ls) + LINE_SEP
# Line produced without any scaling. # unmeasured and vote are not congruent with the exclusion, # but `from_data` is only used in the test and doesn't include the # arg `min_num` -raw_bwl_str = "bw=56 bw_mean=61423 bw_median=55656 "\ - "consensus_bandwidth=600000 consensus_bandwidth_is_unmeasured=False "\ - "desc_bw_avg=1000000000 desc_bw_bur=123456 desc_bw_obs_last=524288 "\ - "desc_bw_obs_mean=524288 error_circ=0 error_destination=0 error_misc=0 " \ - "error_second_relay=0 error_stream=2 " \ - "master_key_ed25519=g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s " \ - "nick=A " \ - "node_id=$AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA "\ - "relay_in_recent_consensus_count=3 "\ - "relay_recent_measurement_attempt_count=2 "\ - "relay_recent_measurements_excluded_error_count=2 "\ - "relay_recent_priority_list_count=3 "\ - "rtt=456 success=1 " \ +raw_bwl_str = ( + "bw=56 bw_mean=61423 bw_median=55656 " + "consensus_bandwidth=600000 consensus_bandwidth_is_unmeasured=False " + "desc_bw_avg=1000000000 desc_bw_bur=123456 desc_bw_obs_last=524288 " + "desc_bw_obs_mean=524288 error_circ=0 error_destination=0 error_misc=0 " + "error_second_relay=0 error_stream=2 " + "master_key_ed25519=g+Shk00y9Md0hg1S6ptnuc/wWKbADBgdjT0Kg+TSF3s " + "nick=A " + "node_id=$AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA " + "relay_in_recent_consensus_count=3 " + "relay_recent_measurement_attempt_count=2 " + "relay_recent_measurements_excluded_error_count=2 " + "relay_recent_priority_list_count=3 " + "rtt=456 success=1 " "time=2018-04-17T14:09:07\n" +)
v3bw_str = header_extra_str + raw_bwl_str
def test_v3bwheader_str(): """Test header str""" - header = V3BWHeader(timestamp_l, scanner_country=scanner_country, - destinations_countries=destinations_countries, - file_created=file_created) + header = V3BWHeader( + timestamp_l, + scanner_country=scanner_country, + destinations_countries=destinations_countries, + file_created=file_created, + ) assert header_str == str(header)
def test_v3bwheader_extra_str(): """Test header str with additional headers""" - header = V3BWHeader(timestamp_l, - file_created=file_created, - generator_started=generator_started, - earliest_bandwidth=earliest_bandwidth, - tor_version=tor_version) + header = V3BWHeader( + timestamp_l, + file_created=file_created, + generator_started=generator_started, + earliest_bandwidth=earliest_bandwidth, + tor_version=tor_version, + ) assert header_extra_str == str(header)
def test_v3bwheader_from_lines(): - header_obj = V3BWHeader(timestamp_l, - file_created=file_created, - generator_started=generator_started, - earliest_bandwidth=earliest_bandwidth, - tor_version=tor_version) + header_obj = V3BWHeader( + timestamp_l, + file_created=file_created, + generator_started=generator_started, + earliest_bandwidth=earliest_bandwidth, + tor_version=tor_version, + ) header, _ = V3BWHeader.from_lines_v1(header_extra_ls) assert str(header_obj) == str(header)
def test_v3bwheader_from_text(): - header_obj = V3BWHeader(timestamp_l, - file_created=file_created, - generator_started=generator_started, - earliest_bandwidth=earliest_bandwidth, - tor_version=tor_version) + header_obj = V3BWHeader( + timestamp_l, + file_created=file_created, + generator_started=generator_started, + earliest_bandwidth=earliest_bandwidth, + tor_version=tor_version, + ) header, _ = V3BWHeader.from_text_v1(header_extra_str) assert str(header_obj) == str(header)
def test_num_results_of_type(result_success, result_error_stream): - assert num_results_of_type([result_success], 'success') == 1 - assert num_results_of_type([result_error_stream], 'success') == 0 - assert num_results_of_type([result_success], 'error-stream') == 0 - assert num_results_of_type([result_error_stream], 'error-stream') == 1 + assert num_results_of_type([result_success], "success") == 1 + assert num_results_of_type([result_error_stream], "success") == 0 + assert num_results_of_type([result_success], "error-stream") == 0 + assert num_results_of_type([result_error_stream], "error-stream") == 1
def assert_round_sig_dig_any_digits(n, result): """Test that rounding n to any reasonable number of significant digits - produces result.""" - max_digits_int64 = int(math.ceil(math.log10(2**64 - 1))) + 1 + produces result.""" + max_digits_int64 = int(math.ceil(math.log10(2 ** 64 - 1))) + 1 for d in range(1, max_digits_int64 + 1): - assert(round_sig_dig(n, digits=d) == result) + assert round_sig_dig(n, digits=d) == result
def assert_round_sig_dig_any_digits_error(n, elp_fraction=0.5): """Test that rounding n to any reasonable number of significant digits - produces a result within elp_fraction * 10.0 ** -(digits - 1).""" - max_digits_int64 = int(math.ceil(math.log10(2**64 - 1))) + 1 + produces a result within elp_fraction * 10.0 ** -(digits - 1).""" + max_digits_int64 = int(math.ceil(math.log10(2 ** 64 - 1))) + 1 for d in range(1, max_digits_int64 + 1): error_fraction = elp_fraction * (10.0 ** -(d - 1)) # use ceil rather than round, to work around floating-point inaccuracy e = int(math.ceil(n * error_fraction)) - assert(round_sig_dig(n, digits=d) >= n - e) - assert(round_sig_dig(n, digits=d) <= n + e) + assert round_sig_dig(n, digits=d) >= n - e + assert round_sig_dig(n, digits=d) <= n + e
def test_round_sig_dig(): """Test rounding to a number of significant digits.""" # Expected values - assert(round_sig_dig(11, 1) == 10) - assert(round_sig_dig(11, 2) == 11) + assert round_sig_dig(11, 1) == 10 + assert round_sig_dig(11, 2) == 11
- assert(round_sig_dig(15, 1) == 20) - assert(round_sig_dig(15, 2) == 15) + assert round_sig_dig(15, 1) == 20 + assert round_sig_dig(15, 2) == 15
- assert(round_sig_dig(54, 1) == 50) - assert(round_sig_dig(54, 2) == 54) + assert round_sig_dig(54, 1) == 50 + assert round_sig_dig(54, 2) == 54
- assert(round_sig_dig(96, 1) == 100) - assert(round_sig_dig(96, 2) == 96) + assert round_sig_dig(96, 1) == 100 + assert round_sig_dig(96, 2) == 96
- assert(round_sig_dig(839, 1) == 800) - assert(round_sig_dig(839, 2) == 840) - assert(round_sig_dig(839, 3) == 839) + assert round_sig_dig(839, 1) == 800 + assert round_sig_dig(839, 2) == 840 + assert round_sig_dig(839, 3) == 839
- assert(round_sig_dig(5789, 1) == 6000) - assert(round_sig_dig(5789, 2) == 5800) - assert(round_sig_dig(5789, 3) == 5790) - assert(round_sig_dig(5789, 4) == 5789) + assert round_sig_dig(5789, 1) == 6000 + assert round_sig_dig(5789, 2) == 5800 + assert round_sig_dig(5789, 3) == 5790 + assert round_sig_dig(5789, 4) == 5789
- assert(round_sig_dig(24103, 1) == 20000) - assert(round_sig_dig(24103, 2) == 24000) - assert(round_sig_dig(24103, 3) == 24100) - assert(round_sig_dig(24103, 4) == 24100) - assert(round_sig_dig(24103, 5) == 24103) + assert round_sig_dig(24103, 1) == 20000 + assert round_sig_dig(24103, 2) == 24000 + assert round_sig_dig(24103, 3) == 24100 + assert round_sig_dig(24103, 4) == 24100 + assert round_sig_dig(24103, 5) == 24103
- assert(round_sig_dig(300000, 1) == 300000) + assert round_sig_dig(300000, 1) == 300000
# Floating-point values
# Must round based on fractions, must not double-round - assert(round_sig_dig(14, 1) == 10) - assert(round_sig_dig(14.0, 1) == 10) - assert(round_sig_dig(14.9, 1) == 10) - assert(round_sig_dig(15.0, 1) == 20) - assert(round_sig_dig(15.1, 1) == 20) - - assert(round_sig_dig(14, 2) == 14) - assert(round_sig_dig(14.0, 2) == 14) - assert(round_sig_dig(14.9, 2) == 15) - assert(round_sig_dig(15.0, 2) == 15) - assert(round_sig_dig(15.1, 2) == 15) + assert round_sig_dig(14, 1) == 10 + assert round_sig_dig(14.0, 1) == 10 + assert round_sig_dig(14.9, 1) == 10 + assert round_sig_dig(15.0, 1) == 20 + assert round_sig_dig(15.1, 1) == 20 + + assert round_sig_dig(14, 2) == 14 + assert round_sig_dig(14.0, 2) == 14 + assert round_sig_dig(14.9, 2) == 15 + assert round_sig_dig(15.0, 2) == 15 + assert round_sig_dig(15.1, 2) == 15
# Must round to integer - assert(round_sig_dig(14, 3) == 14) - assert(round_sig_dig(14.0, 3) == 14) - assert(round_sig_dig(14.9, 3) == 15) - assert(round_sig_dig(15.0, 3) == 15) - assert(round_sig_dig(15.1, 3) == 15) + assert round_sig_dig(14, 3) == 14 + assert round_sig_dig(14.0, 3) == 14 + assert round_sig_dig(14.9, 3) == 15 + assert round_sig_dig(15.0, 3) == 15 + assert round_sig_dig(15.1, 3) == 15
# Small integers assert_round_sig_dig_any_digits(0, 1) @@ -217,21 +258,21 @@ def test_round_sig_dig(): assert_round_sig_dig_any_digits(10, 10)
# Large values - assert_round_sig_dig_any_digits_error(2**30) - assert_round_sig_dig_any_digits_error(2**31) - assert_round_sig_dig_any_digits_error(2**32) + assert_round_sig_dig_any_digits_error(2 ** 30) + assert_round_sig_dig_any_digits_error(2 ** 31) + assert_round_sig_dig_any_digits_error(2 ** 32)
# the floating-point accuracy limit for this function is 2**73 # on some machines - assert_round_sig_dig_any_digits_error(2**62) - assert_round_sig_dig_any_digits_error(2**63) - assert_round_sig_dig_any_digits_error(2**64) + assert_round_sig_dig_any_digits_error(2 ** 62) + assert_round_sig_dig_any_digits_error(2 ** 63) + assert_round_sig_dig_any_digits_error(2 ** 64)
# Out of range values: must round to 1 assert_round_sig_dig_any_digits(-0.01, 1) assert_round_sig_dig_any_digits(-1, 1) assert_round_sig_dig_any_digits(-10.5, 1) - assert_round_sig_dig_any_digits(-(2**31), 1) + assert_round_sig_dig_any_digits(-(2 ** 31), 1)
# test the transition points in the supported range # testing the entire range up to 1 million takes 100s @@ -248,7 +289,7 @@ def test_round_sig_dig():
def test_v3bwline_from_results_file(datadir): - lines = datadir.readlines('results.txt') + lines = datadir.readlines("results.txt") d = dict() for line in lines: r = Result.from_dict(json.loads(line.strip(), cls=CustomDecoder)) @@ -264,12 +305,14 @@ def test_v3bwline_from_results_file(datadir):
def test_from_results_read(datadir, tmpdir, conf, args): results = load_result_file(str(datadir.join("results.txt"))) - expected_header = V3BWHeader(timestamp_l, - earliest_bandwidth=earliest_bandwidth, - latest_bandwidth=latest_bandwidth) + expected_header = V3BWHeader( + timestamp_l, + earliest_bandwidth=earliest_bandwidth, + latest_bandwidth=latest_bandwidth, + ) exclusion_dict = dict( [(k, 0) for k in HEADER_RECENT_MEASUREMENTS_EXCLUDED_KEYS] - ) + ) expected_header.add_relays_excluded_counters(exclusion_dict) raw_bwls = [V3BWLine.from_results(results[fp])[0] for fp in results] # Scale BWLines using torflow method, since it's the default and BWLines @@ -317,83 +360,99 @@ def num_consensus_relays(fpath):
# To do not have to create a consensus-cache file and set the path, # mock the result since it only returns the number of relays. -@mock.patch.object(V3BWFile, 'read_number_consensus_relays') +@mock.patch.object(V3BWFile, "read_number_consensus_relays") def test_torflow_scale(mock_consensus, datadir, tmpdir, conf): mock_consensus.return_value = 1 # state_fpath = str(tmpdir.join('.sbws', 'state.dat')) - state_fpath = conf['paths']['state_fpath'] + state_fpath = conf["paths"]["state_fpath"] results = load_result_file(str(datadir.join("results.txt"))) # Since v1.1.0, it'll write bw=1 if the minimum percent of measured relays # wasn't reached. Therefore mock the consensus number. # Because the consensus number is mocked, it'll try to read the sate path. # Obtain it from conf, so that the root directory exists. - v3bwfile = V3BWFile.from_results(results, '', '', - state_fpath, - scaling_method=TORFLOW_SCALING, - round_digs=TORFLOW_ROUND_DIG) + v3bwfile = V3BWFile.from_results( + results, + "", + "", + state_fpath, + scaling_method=TORFLOW_SCALING, + round_digs=TORFLOW_ROUND_DIG, + ) assert v3bwfile.bw_lines[0].bw == 6 - v3bwfile = V3BWFile.from_results(results, '', '', - state_fpath, - scaling_method=TORFLOW_SCALING, - torflow_cap=0.0001, - round_digs=TORFLOW_ROUND_DIG) + v3bwfile = V3BWFile.from_results( + results, + "", + "", + state_fpath, + scaling_method=TORFLOW_SCALING, + torflow_cap=0.0001, + round_digs=TORFLOW_ROUND_DIG, + ) # Wrong because it should be rounded after clipping assert v3bwfile.bw_lines[0].bw == 1 - v3bwfile = V3BWFile.from_results(results, '', '', - state_fpath, - scaling_method=TORFLOW_SCALING, - torflow_cap=1, - round_digs=TORFLOW_ROUND_DIG) + v3bwfile = V3BWFile.from_results( + results, + "", + "", + state_fpath, + scaling_method=TORFLOW_SCALING, + torflow_cap=1, + round_digs=TORFLOW_ROUND_DIG, + ) assert v3bwfile.bw_lines[0].bw == 123 - v3bwfile = V3BWFile.from_results(results, '', '', - state_fpath, - scaling_method=TORFLOW_SCALING, - torflow_cap=1, - round_digs=PROP276_ROUND_DIG) + v3bwfile = V3BWFile.from_results( + results, + "", + "", + state_fpath, + scaling_method=TORFLOW_SCALING, + torflow_cap=1, + round_digs=PROP276_ROUND_DIG, + ) assert v3bwfile.bw_lines[0].bw == 120
def test_torflow_scale_no_desc_bw_avg(datadir, conf, caplog): - state_fpath = conf['paths']['state_fpath'] + state_fpath = conf["paths"]["state_fpath"] results = load_result_file(str(datadir.join("results_no_desc_bw_avg.txt"))) caplog.set_level(logging.DEBUG) - v3bwfile = V3BWFile.from_results(results, '', '', state_fpath) + v3bwfile = V3BWFile.from_results(results, "", "", state_fpath) assert v3bwfile.bw_lines[0].bw == 26
def test_torflow_scale_no_desc_bw_obs(datadir, conf, caplog): - state_fpath = conf['paths']['state_fpath'] + state_fpath = conf["paths"]["state_fpath"] results = load_result_file(str(datadir.join("results_no_desc_bw_obs.txt"))) caplog.set_level(logging.DEBUG) - v3bwfile = V3BWFile.from_results(results, '', '', state_fpath) + v3bwfile = V3BWFile.from_results(results, "", "", state_fpath) assert v3bwfile.bw_lines[0].bw == 30
def test_torflow_scale_no_desc_bw_avg_obs(datadir, conf, caplog): - state_fpath = conf['paths']['state_fpath'] + state_fpath = conf["paths"]["state_fpath"] results = load_result_file( str(datadir.join("results_no_desc_bw_avg_obs.txt")) ) caplog.set_level(logging.DEBUG) - v3bwfile = V3BWFile.from_results(results, '', '', state_fpath) + v3bwfile = V3BWFile.from_results(results, "", "", state_fpath) assert v3bwfile.bw_lines[0].bw == 30
def test_torflow_scale_no_consensus_bw(datadir, conf, caplog): - state_fpath = conf['paths']['state_fpath'] - results = load_result_file(str( - datadir.join("results_no_consensus_bw.txt")) + state_fpath = conf["paths"]["state_fpath"] + results = load_result_file( + str(datadir.join("results_no_consensus_bw.txt")) ) caplog.set_level(logging.DEBUG) - v3bwfile = V3BWFile.from_results(results, '', '', state_fpath) + v3bwfile = V3BWFile.from_results(results, "", "", state_fpath) assert v3bwfile.bw_lines[0].bw == 26
def test_torflow_scale_0_consensus_bw(datadir, conf, caplog): - state_fpath = conf['paths']['state_fpath'] + state_fpath = conf["paths"]["state_fpath"] results = load_result_file(str(datadir.join("results_0_consensus_bw.txt"))) caplog.set_level(logging.DEBUG) - v3bwfile = V3BWFile.from_results(results, '', '', state_fpath) + v3bwfile = V3BWFile.from_results(results, "", "", state_fpath) assert v3bwfile.bw_lines[0].bw == 26
@@ -423,7 +482,7 @@ def test_results_away_each_other(datadir): # the relay is excluded bwl, reason = V3BWLine.from_results(values, secs_away=secs_away, min_num=2) assert bwl.relay_recent_measurements_excluded_near_count == 2 - assert reason == 'recent_measurements_excluded_near_count' + assert reason == "recent_measurements_excluded_near_count" assert bwl.vote == 0 assert bwl.unmeasured == 1
@@ -445,7 +504,7 @@ def test_results_away_each_other(datadir): # There is only 1 result, the relay is excluded bwl, reason = V3BWLine.from_results(values, min_num=2) assert bwl.relay_recent_measurements_excluded_few_count == 1 - assert reason == 'recent_measurements_excluded_few_count' + assert reason == "recent_measurements_excluded_few_count" assert bwl.vote == 0 assert bwl.unmeasured == 1
@@ -456,11 +515,13 @@ def test_results_away_each_other(datadir): def test_measured_progress_stats(datadir): number_consensus_relays = 3 bw_lines_raw = [] - statsd_exp = {'percent_eligible_relays': 100, - 'minimum_percent_eligible_relays': 60, - 'number_consensus_relays': 3, - 'minimum_number_eligible_relays': 2, - 'number_eligible_relays': 3} + statsd_exp = { + "percent_eligible_relays": 100, + "minimum_percent_eligible_relays": 60, + "number_consensus_relays": 3, + "minimum_number_eligible_relays": 2, + "number_eligible_relays": 3, + } min_perc_reached_before = None results = load_result_file(str(datadir.join("results_away.txt"))) for fp, values in results.items(): @@ -472,18 +533,22 @@ def test_measured_progress_stats(datadir): bw_lines = V3BWFile.bw_torflow_scale(bw_lines_raw) assert len(bw_lines) == 3 statsd, success = V3BWFile.measured_progress_stats( - len(bw_lines), number_consensus_relays, min_perc_reached_before) + len(bw_lines), number_consensus_relays, min_perc_reached_before + ) assert success assert statsd == statsd_exp number_consensus_relays = 6 statsd, success = V3BWFile.measured_progress_stats( - len(bw_lines), number_consensus_relays, min_perc_reached_before) + len(bw_lines), number_consensus_relays, min_perc_reached_before + ) assert not success - statsd_exp = {'percent_eligible_relays': 50, - 'minimum_percent_eligible_relays': 60, - 'number_consensus_relays': 6, - 'minimum_number_eligible_relays': 4, - 'number_eligible_relays': 3} + statsd_exp = { + "percent_eligible_relays": 50, + "minimum_percent_eligible_relays": 60, + "number_consensus_relays": 6, + "minimum_number_eligible_relays": 4, + "number_eligible_relays": 3, + } assert statsd_exp == statsd
@@ -499,46 +564,50 @@ def test_update_progress(datadir, tmpdir): if line is not None: bw_lines_raw.append(line) bwfile = V3BWFile(header, []) - bwfile.update_progress(len(bw_lines_raw), header, number_consensus_relays, - state) - assert header.percent_eligible_relays == '50' - assert state.get('min_perc_reached') is None + bwfile.update_progress( + len(bw_lines_raw), header, number_consensus_relays, state + ) + assert header.percent_eligible_relays == "50" + assert state.get("min_perc_reached") is None # Test that the headers are also included when there are enough eligible # relays number_consensus_relays = 3 header = V3BWHeader(str(now_unixts())) - bwfile.update_progress(len(bw_lines_raw), header, number_consensus_relays, - state) - assert state.get('min_perc_reached') == now_isodt_str() - assert header.minimum_number_eligible_relays == '2' + bwfile.update_progress( + len(bw_lines_raw), header, number_consensus_relays, state + ) + assert state.get("min_perc_reached") == now_isodt_str() + assert header.minimum_number_eligible_relays == "2" assert header.minimum_percent_eligible_relays == str(MIN_REPORT) - assert header.number_consensus_relays == '3' - assert header.number_eligible_relays == '3' - assert header.percent_eligible_relays == '100' + assert header.number_consensus_relays == "3" + assert header.number_eligible_relays == "3" + assert header.percent_eligible_relays == "100"
def test_time_measure_half_network(caplog): - header = V3BWHeader(timestamp_l, - file_created=file_created, - generator_started=generator_started, - earliest_bandwidth=earliest_bandwidth) - header.number_consensus_relays = '6500' - header.number_eligible_relays = '4000' + header = V3BWHeader( + timestamp_l, + file_created=file_created, + generator_started=generator_started, + earliest_bandwidth=earliest_bandwidth, + ) + header.number_consensus_relays = "6500" + header.number_eligible_relays = "4000" caplog.set_level(logging.INFO) header.add_time_report_half_network() - assert header.time_to_report_half_network == '70200' # 19.5h + assert header.time_to_report_half_network == "70200" # 19.5h expected_log = "Estimated time to measure the network: 39 hours." # 19.5*2 assert caplog.records[-1].getMessage() == expected_log
-@mock.patch.object(V3BWFile, 'read_number_consensus_relays') +@mock.patch.object(V3BWFile, "read_number_consensus_relays") def test_set_under_min_report(mock_consensus, conf, datadir): # The number of relays (1) is the same as the ones in the consensus, # therefore there is no any relay excluded and under_min_report is not set. mock_consensus.return_value = 1 - state_fpath = conf['paths']['state_fpath'] + state_fpath = conf["paths"]["state_fpath"] results = load_result_file(str(datadir.join("results.txt"))) - v3bwfile = V3BWFile.from_results(results, '', '', state_fpath) + v3bwfile = V3BWFile.from_results(results, "", "", state_fpath) bwl = v3bwfile.bw_lines[0] assert not hasattr(bwl, "vote") assert not hasattr(bwl, "under_min_report") @@ -549,7 +618,7 @@ def test_set_under_min_report(mock_consensus, conf, datadir): # and unmeasured was also set to 1. # After filtering the relay is excluded because there's only 1 success # result and it should have at least 2 (min_num) - v3bwfile = V3BWFile.from_results(results, '', '', state_fpath, min_num=2) + v3bwfile = V3BWFile.from_results(results, "", "", state_fpath, min_num=2) bwl = v3bwfile.bw_lines[0] assert bwl.vote == 0 assert bwl.under_min_report == 1 @@ -559,7 +628,7 @@ def test_set_under_min_report(mock_consensus, conf, datadir): # The number of relays after scaling is than the 60% in the network, # therefore the relays are excluded and under_min_report is set to 1. mock_consensus.return_value = 3 - v3bwfile = V3BWFile.from_results(results, '', '', state_fpath) + v3bwfile = V3BWFile.from_results(results, "", "", state_fpath) bwl = v3bwfile.bw_lines[0] assert bwl.vote == 0 assert bwl.under_min_report == 1 @@ -567,57 +636,57 @@ def test_set_under_min_report(mock_consensus, conf, datadir):
def test_generator_started(root_data_path, datadir): - state_fpath = os.path.join(root_data_path, '.sbws/state.dat') + state_fpath = os.path.join(root_data_path, ".sbws/state.dat") # The method is correct assert "2020-02-29T10:00:00" == V3BWHeader.generator_started_from_file( state_fpath ) # `results` does not matter here, using them to not have an empty list. results = load_result_file(str(datadir.join("results.txt"))) - header = V3BWHeader.from_results(results, '', '', state_fpath) + header = V3BWHeader.from_results(results, "", "", state_fpath) # And the header is correct assert "2020-02-29T10:00:00" == header.generator_started
def test_recent_consensus_count(root_data_path, datadir): # This state has recent_consensus_count - state_fpath = os.path.join(root_data_path, '.sbws/state.dat') + state_fpath = os.path.join(root_data_path, ".sbws/state.dat") assert "1" == V3BWHeader.consensus_count_from_file(state_fpath) # `results` does not matter here, using them to not have an empty list. results = load_result_file(str(datadir.join("results.txt"))) - header = V3BWHeader.from_results(results, '', '', state_fpath) + header = V3BWHeader.from_results(results, "", "", state_fpath) assert "1" == header.recent_consensus_count
@pytest.mark.skip(reason="increment_recent_measurement_attempt() disabled") def test_recent_measurement_attempt_count(root_data_path, datadir): - state_fpath = os.path.join(root_data_path, '.sbws/state.dat') + state_fpath = os.path.join(root_data_path, ".sbws/state.dat") assert 15 == V3BWHeader.recent_measurement_attempt_count_from_file( state_fpath ) # `results` does not matter here, using them to not have an empty list. results = load_result_file(str(datadir.join("results.txt"))) - header = V3BWHeader.from_results(results, '', '', state_fpath) + header = V3BWHeader.from_results(results, "", "", state_fpath) assert "15" == header.recent_measurement_attempt_count
def test_recent_priority_list_count(root_data_path, datadir): # This state has recent_priority_list - state_fpath = os.path.join(root_data_path, '.sbws/state.dat') + state_fpath = os.path.join(root_data_path, ".sbws/state.dat") assert 1 == V3BWHeader.recent_priority_list_count_from_file(state_fpath) # `results` does not matter here, using them to don't have an empty list. results = load_result_file(str(datadir.join("results.txt"))) - header = V3BWHeader.from_results(results, '', '', state_fpath) + header = V3BWHeader.from_results(results, "", "", state_fpath) assert "1" == header.recent_priority_list_count
def test_recent_priority_relay_count(root_data_path, datadir): # This state has recent_priority_relay_count - state_fpath = os.path.join(root_data_path, '.sbws/state.dat') + state_fpath = os.path.join(root_data_path, ".sbws/state.dat") assert 15 == V3BWHeader.recent_priority_relay_count_from_file(state_fpath) # `results` does not matter here, using them to don't have an empty list. results = load_result_file(str(datadir.join("results.txt"))) - header = V3BWHeader.from_results(results, '', '', state_fpath) + header = V3BWHeader.from_results(results, "", "", state_fpath) assert "15" == header.recent_priority_relay_count
diff --git a/tests/unit/test_bwfile_health.py b/tests/unit/test_bwfile_health.py index 372dbae..c6fefc0 100644 --- a/tests/unit/test_bwfile_health.py +++ b/tests/unit/test_bwfile_health.py @@ -5,18 +5,18 @@ from sbws.lib.bwfile_health import BwFile
def test_bwfile_health(root_data_path): - bwfile = BwFile.load(os.path.join( - root_data_path, "2020-03-22-08-35-00-bandwidth" - )) + bwfile = BwFile.load( + os.path.join(root_data_path, "2020-03-22-08-35-00-bandwidth") + ) assert bwfile.header.is_correct assert bwfile.are_bwlines_correct assert bwfile.is_correct
def test_bwlines_health(capsys, root_data_path): - bwfile = BwFile.load(os.path.join( - root_data_path, "2020-03-22-08-35-00-bandwidth" - )) + bwfile = BwFile.load( + os.path.join(root_data_path, "2020-03-22-08-35-00-bandwidth") + ) out = ( "\nrelay_recent_measurement_attempt_count <= relay_recent_priority_list_count,\n" # noqa "True\n" diff --git a/tests/unit/util/test_config.py b/tests/unit/util/test_config.py index 4ebc703..48b784f 100644 --- a/tests/unit/util/test_config.py +++ b/tests/unit/util/test_config.py @@ -10,269 +10,329 @@ class PseudoSection: self.maxi = maxi
def getfloat(self, key): - assert key == self.key, 'But in tests; key should exist' + assert key == self.key, "But in tests; key should exist" return float(self.value)
def getint(self, key): - assert key == self.key, 'But in tests; key should exist' + assert key == self.key, "But in tests; key should exist" return int(self.value)
def test_validate_fingerprint(): fp_len = 40 bads = [ - 'A' * (fp_len - 1), 'A' * (fp_len + 1), - '', 'A' * (1000000), - 'a' * fp_len, 'O' * fp_len - ] - goods = [ - 'A' * fp_len, - ''.join(list('0123456789ABCDEF' * 3)[0:fp_len]) + "A" * (fp_len - 1), + "A" * (fp_len + 1), + "", + "A" * (1000000), + "a" * fp_len, + "O" * fp_len, ] + goods = ["A" * fp_len, "".join(list("0123456789ABCDEF" * 3)[0:fp_len])] for fp in bads: - d = {'': fp} - valid, reason = con._validate_fingerprint(d, '') - assert not valid, 'Fingerprint {} should not have passed '\ - 'validation'.format(fp) + d = {"": fp} + valid, reason = con._validate_fingerprint(d, "") + assert ( + not valid + ), "Fingerprint {} should not have passed " "validation".format(fp) for fp in goods: - d = {'': fp} - valid, reason = con._validate_fingerprint(d, '') - assert valid, 'Fingerprint {} should have passed, but didn't '\ - 'because {}'.format(fp, reason) + d = {"": fp} + valid, reason = con._validate_fingerprint(d, "") + assert valid, ( + "Fingerprint {} should have passed, but didn't " + "because {}".format(fp, reason) + )
def test_validate_int_simple(): bads = [ - PseudoSection('', 'NotAInt'), - PseudoSection('', '-0.1'), PseudoSection('', '0.1'), + PseudoSection("", "NotAInt"), + PseudoSection("", "-0.1"), + PseudoSection("", "0.1"), ] goods = [ - PseudoSection('', '0'), - PseudoSection('', '1'), PseudoSection('', '-1'), - PseudoSection('', '100000000'), PseudoSection('', '-1000000000'), - PseudoSection('', '+0'), PseudoSection('', '-0'), + PseudoSection("", "0"), + PseudoSection("", "1"), + PseudoSection("", "-1"), + PseudoSection("", "100000000"), + PseudoSection("", "-1000000000"), + PseudoSection("", "+0"), + PseudoSection("", "-0"), ] for sec in bads: valid, reason = con._validate_int(sec, sec.key) - assert not valid, '{} should not have been a valid '\ - 'int'.format(sec.value) + assert not valid, "{} should not have been a valid " "int".format( + sec.value + ) for sec in goods: valid, reason = con._validate_int(sec, sec.key) - assert valid, '{} should have been a valid int, but '\ - 'got: {}'.format(sec.value, reason) + assert valid, "{} should have been a valid int, but " "got: {}".format( + sec.value, reason + )
def test_validate_float_simple(): bads = [ - PseudoSection('', 'NotAFloat'), + PseudoSection("", "NotAFloat"), ] goods = [ - PseudoSection('', '0'), - PseudoSection('', '1'), PseudoSection('', '-1'), - PseudoSection('', '-0.1'), PseudoSection('', '0.1'), - PseudoSection('', '100000000'), PseudoSection('', '-1000000000'), - PseudoSection('', '+0'), PseudoSection('', '-0'), + PseudoSection("", "0"), + PseudoSection("", "1"), + PseudoSection("", "-1"), + PseudoSection("", "-0.1"), + PseudoSection("", "0.1"), + PseudoSection("", "100000000"), + PseudoSection("", "-1000000000"), + PseudoSection("", "+0"), + PseudoSection("", "-0"), ] for sec in bads: valid, reason = con._validate_float(sec, sec.key) - assert not valid, '{} should not have been a valid '\ - 'float'.format(sec.value) + assert not valid, "{} should not have been a valid " "float".format( + sec.value + ) for sec in goods: valid, reason = con._validate_float(sec, sec.key) - assert valid, '{} should have been a valid float, but '\ - 'got: {}'.format(sec.value, reason) + assert ( + valid + ), "{} should have been a valid float, but " "got: {}".format( + sec.value, reason + )
def test_validate_int_min(): goods = [ - PseudoSection('', '0', mini=0), - PseudoSection('', '1', mini=1), PseudoSection('', '-1', mini=-1), + PseudoSection("", "0", mini=0), + PseudoSection("", "1", mini=1), + PseudoSection("", "-1", mini=-1), ] bads = [ - PseudoSection('', '1', mini=2), - PseudoSection('', '0', mini=1), + PseudoSection("", "1", mini=2), + PseudoSection("", "0", mini=1), ] for sec in bads: valid, reason = con._validate_int(sec, sec.key, minimum=sec.mini) - assert not valid, '{} should not have been a valid '\ - 'int'.format(sec.value) + assert not valid, "{} should not have been a valid " "int".format( + sec.value + ) for sec in goods: valid, reason = con._validate_int(sec, sec.key, minimum=sec.mini) - assert valid, '{} should have been a valid int, but '\ - 'got: {}'.format(sec.value, reason) + assert valid, "{} should have been a valid int, but " "got: {}".format( + sec.value, reason + )
def test_validate_float_min(): goods = [ - PseudoSection('', '0', mini=0.0), - PseudoSection('', '0.1', mini=0.1), - PseudoSection('', '-0.1', mini=-0.1), - PseudoSection('', '0.1', mini=-0.1), + PseudoSection("", "0", mini=0.0), + PseudoSection("", "0.1", mini=0.1), + PseudoSection("", "-0.1", mini=-0.1), + PseudoSection("", "0.1", mini=-0.1), ] bads = [ - PseudoSection('', '0.0999999999', mini=0.1), + PseudoSection("", "0.0999999999", mini=0.1), ] for sec in bads: valid, reason = con._validate_float(sec, sec.key, minimum=sec.mini) - assert not valid, '{} should not have been a valid '\ - 'float'.format(sec.value) + assert not valid, "{} should not have been a valid " "float".format( + sec.value + ) for sec in goods: valid, reason = con._validate_float(sec, sec.key, minimum=sec.mini) - assert valid, '{} should have been a valid float, but '\ - 'got: {}'.format(sec.value, reason) + assert ( + valid + ), "{} should have been a valid float, but " "got: {}".format( + sec.value, reason + )
def test_validate_int_max(): goods = [ - PseudoSection('', '0', maxi=0), - PseudoSection('', '1', maxi=1), PseudoSection('', '-1', maxi=-1), - PseudoSection('', '-1', maxi=1), + PseudoSection("", "0", maxi=0), + PseudoSection("", "1", maxi=1), + PseudoSection("", "-1", maxi=-1), + PseudoSection("", "-1", maxi=1), ] bads = [ - PseudoSection('', '2', maxi=1), - PseudoSection('', '1', maxi=0), + PseudoSection("", "2", maxi=1), + PseudoSection("", "1", maxi=0), ] for sec in bads: valid, reason = con._validate_int(sec, sec.key, maximum=sec.maxi) - assert not valid, '{} should not have been a valid '\ - 'int'.format(sec.value) + assert not valid, "{} should not have been a valid " "int".format( + sec.value + ) for sec in goods: valid, reason = con._validate_int(sec, sec.key, maximum=sec.maxi) - assert valid, '{} should have been a valid int, but '\ - 'got: {}'.format(sec.value, reason) + assert valid, "{} should have been a valid int, but " "got: {}".format( + sec.value, reason + )
def test_validate_float_max(): goods = [ - PseudoSection('', '0', maxi=0.0), - PseudoSection('', '0.1', maxi=0.1), - PseudoSection('', '-0.1', maxi=-0.1), - PseudoSection('', '-0.1', maxi=0.1), + PseudoSection("", "0", maxi=0.0), + PseudoSection("", "0.1", maxi=0.1), + PseudoSection("", "-0.1", maxi=-0.1), + PseudoSection("", "-0.1", maxi=0.1), ] bads = [ - PseudoSection('', '0.10000000001', maxi=0.1), + PseudoSection("", "0.10000000001", maxi=0.1), ] for sec in bads: valid, reason = con._validate_float(sec, sec.key, maximum=sec.maxi) - assert not valid, '{} should not have been a valid '\ - 'float'.format(sec.value) + assert not valid, "{} should not have been a valid " "float".format( + sec.value + ) for sec in goods: valid, reason = con._validate_float(sec, sec.key, maximum=sec.maxi) - assert valid, '{} should have been a valid float, but '\ - 'got: {}'.format(sec.value, reason) + assert ( + valid + ), "{} should have been a valid float, but " "got: {}".format( + sec.value, reason + )
def test_validate_bool(): goods = [ - 'on', 'True', 'true', 'yes', - 'off', 'False', 'false', 'no', - '0', '1', + "on", + "True", + "true", + "yes", + "off", + "False", + "false", + "no", + "0", + "1", ] bads = [ - 'onn', 'offf', - '2', '', + "onn", + "offf", + "2", + "", ] for val in goods: conf = ConfigParser() - conf.read_dict({'sec': {}}) - conf['sec']['key'] = val - valid, reason = con._validate_boolean(conf['sec'], 'key') - assert valid, '{} should have been a valid bool, but '\ - 'got: {}'.format(val, reason) + conf.read_dict({"sec": {}}) + conf["sec"]["key"] = val + valid, reason = con._validate_boolean(conf["sec"], "key") + assert ( + valid + ), "{} should have been a valid bool, but " "got: {}".format( + val, reason + ) for val in bads: conf = ConfigParser() - conf.read_dict({'sec': {}}) - conf['sec']['key'] = val - valid, reason = con._validate_boolean(conf['sec'], 'key') - assert not valid, '{} should not have been a valid '\ - 'bool'.format(val) + conf.read_dict({"sec": {}}) + conf["sec"]["key"] = val + valid, reason = con._validate_boolean(conf["sec"], "key") + assert not valid, "{} should not have been a valid " "bool".format(val)
def test_validate_url(): goods = [ - 'https://example.com', 'https://example.com/', - 'https://example.com/foo.bar', - 'https://example.com/foo/bar', - 'https://user@example.com', - 'https://48.290.983.123:4443', - 'http://127.0.0.1:8000' + "https://example.com", + "https://example.com/", + "https://example.com/foo.bar", + "https://example.com/foo/bar", + "https://user@example.com", + "https://48.290.983.123:4443", + "http://127.0.0.1:8000", ] bads = [ - 'ftp://example.com/foo.bar', - 'http://', 'http:///', - 'http://example.com', + "ftp://example.com/foo.bar", + "http://", + "http:///", + "http://example.com", ] for val in goods: - d = {'': val} - valid, reason = con._validate_url(d, '') - assert valid, '{} should have been a valid URL, but '\ - 'got: {}'.format(val, reason) + d = {"": val} + valid, reason = con._validate_url(d, "") + assert valid, "{} should have been a valid URL, but " "got: {}".format( + val, reason + ) for val in bads: - d = {'': val} - valid, reason = con._validate_url(d, '') - assert not valid, '{} should not have been a valid URL'.format(val) + d = {"": val} + valid, reason = con._validate_url(d, "") + assert not valid, "{} should not have been a valid URL".format(val)
def test_nickname(): max_len = 32 goods = [ - 'aaa', 'AAA', 'aAa', 'A1a', '1aA', 'aA1', - '!!!', '!@#', - 'a!A', '!Aa', 'Aa!', - 'a' * max_len, + "aaa", + "AAA", + "aAa", + "A1a", + "1aA", + "aA1", + "!!!", + "!@#", + "a!A", + "!Aa", + "Aa!", + "a" * max_len, ] bads = [ - '', 'a' * (max_len + 1), - '"', ''', + "", + "a" * (max_len + 1), + '"', + "'", ] for nick in goods: - d = {'n': nick} - valid, reason = con._validate_nickname(d, 'n') + d = {"n": nick} + valid, reason = con._validate_nickname(d, "n") assert valid, reason for nick in bads: - d = {'n': nick} - valid, reason = con._validate_nickname(d, 'n') + d = {"n": nick} + valid, reason = con._validate_nickname(d, "n") assert not valid, reason
def test_country(conf): from string import Template - err_tmpl = Template('$sec/$key ($val): $e') + + err_tmpl = Template("$sec/$key ($val): $e")
# Invalid default country code in scanner section - errors = con._validate_country(conf, 'scanner', 'country', err_tmpl) - assert errors[0] == \ - 'scanner/country (AA): Not a valid ISO 3166 alpha-2 country code.' + errors = con._validate_country(conf, "scanner", "country", err_tmpl) + assert ( + errors[0] + == "scanner/country (AA): Not a valid ISO 3166 alpha-2 country code." + )
# Valid country code in scanner section - conf['scanner']['country'] = 'US' - errors = con._validate_country(conf, 'scanner', 'country', err_tmpl) + conf["scanner"]["country"] = "US" + errors = con._validate_country(conf, "scanner", "country", err_tmpl) assert not errors
# No country in destinations.foo section - conf['destinations']['foo'] = 'on' - conf['destinations.foo'] = {} - conf['destinations.foo']['url'] = 'https://foo.bar' + conf["destinations"]["foo"] = "on" + conf["destinations.foo"] = {} + conf["destinations.foo"]["url"] = "https://foo.bar" errors = con._validate_country( - conf, 'destinations.foo', 'country', err_tmpl) - assert errors[0] == \ - 'destinations.foo/country (None): ' \ - 'Missing country in configuration file.' + conf, "destinations.foo", "country", err_tmpl + ) + assert ( + errors[0] == "destinations.foo/country (None): " + "Missing country in configuration file." + )
# Valid country in destinations.foo section - conf['destinations.foo']['url'] = 'US' - errors = con._validate_country(conf, 'scanner', 'country', err_tmpl) + conf["destinations.foo"]["url"] = "US" + errors = con._validate_country(conf, "scanner", "country", err_tmpl) assert not errors
def test_config_arg_provided_but_no_found(args, conf): - args.config = 'non_existing_file' + args.config = "non_existing_file" user_conf = con._get_user_config(args, conf) # since the user configuration is not found, it is the same as conf assert conf.__dict__.items() == user_conf.__dict__.items()
def test_config_arg_provided(args, conf, datadir): - args.config = datadir.join('user_sbws.ini') + args.config = datadir.join("user_sbws.ini") user_conf = con._get_user_config(args, conf) - assert user_conf['paths']['sbws_home'] == '/tmp/.sbws' + assert user_conf["paths"]["sbws_home"] == "/tmp/.sbws" diff --git a/tests/unit/util/test_state.py b/tests/unit/util/test_state.py index d08a4a9..f8a0072 100644 --- a/tests/unit/util/test_state.py +++ b/tests/unit/util/test_state.py @@ -1,76 +1,77 @@ from sbws.util.state import State import os + # from tempfile import NamedTemporaryFile as NTF
def test_state_set_allowed_key_types(tmpdir): - state = State(os.path.join(str(tmpdir), 'statefoo')) - attempt_keys = ('k') + state = State(os.path.join(str(tmpdir), "statefoo")) + attempt_keys = "k" for key in attempt_keys: state[key] = 4 assert state[key] == 4
def test_state_set_allowed_value_types(tmpdir): - state = State(os.path.join(str(tmpdir), 'statefoo')) - attempt_vals = (15983, None, True, -1.2, 'loooooool') + state = State(os.path.join(str(tmpdir), "statefoo")) + attempt_vals = (15983, None, True, -1.2, "loooooool") for val in attempt_vals: - state['foo'] = val - assert state['foo'] == val + state["foo"] = val + assert state["foo"] == val
def test_state_del(tmpdir): - state = State(os.path.join(str(tmpdir), 'statefoo')) - d = {'a': 1, 'b': 2, 'c': 3, 'd': 4} + state = State(os.path.join(str(tmpdir), "statefoo")) + d = {"a": 1, "b": 2, "c": 3, "d": 4} for key in d: state[key] = d[key] assert len(state) == len(d)
- del d['a'] - del state['a'] + del d["a"] + del state["a"] assert len(state) == len(d) for key in d: assert d[key] == state[key]
- d['e'] = 5 - state['e'] = 5 - d['e'] = 5.5 - state['e'] = 5.5 + d["e"] = 5 + state["e"] = 5 + d["e"] = 5.5 + state["e"] = 5.5 assert len(state) == len(d)
def test_state_get_len(tmpdir): - state = State(os.path.join(str(tmpdir), 'statefoo')) - d = {'a': 1, 'b': 2, 'c': 3, 'd': 4} + state = State(os.path.join(str(tmpdir), "statefoo")) + d = {"a": 1, "b": 2, "c": 3, "d": 4} for key in d: state[key] = d[key] assert len(state) == len(d)
- del d['a'] - del state['a'] + del d["a"] + del state["a"] assert len(state) == len(d)
- d['e'] = 5 - state['e'] = 5 - d['e'] = 5.5 - state['e'] = 5.5 + d["e"] = 5 + state["e"] = 5 + d["e"] = 5.5 + state["e"] = 5.5 assert len(state) == len(d)
def test_state_contains(tmpdir): - state = State(os.path.join(str(tmpdir), 'statefoo')) - d = {'a': 1, 'b': 2, 'c': 3, 'd': 4} + state = State(os.path.join(str(tmpdir), "statefoo")) + d = {"a": 1, "b": 2, "c": 3, "d": 4} for key in d: state[key] = d[key] - assert 'a' in state - assert 'e' not in state + assert "a" in state + assert "e" not in state
def test_state_iter(tmpdir): - state = State(os.path.join(str(tmpdir), 'statefoo')) + state = State(os.path.join(str(tmpdir), "statefoo")) for key in state: pass - d = {'a': 1, 'b': 2, 'c': 3, 'd': 4} + d = {"a": 1, "b": 2, "c": 3, "d": 4} for key in d: state[key] = d[key] assert set([key for key in state]) == set(d) @@ -78,8 +79,8 @@ def test_state_iter(tmpdir):
def test_two_instances(tmpdir): """Test that 2 different intances don't overwrite each other""" - s1 = State(os.path.join(str(tmpdir), 'state.dat')) - s2 = State(os.path.join(str(tmpdir), 'state.dat')) + s1 = State(os.path.join(str(tmpdir), "state.dat")) + s2 = State(os.path.join(str(tmpdir), "state.dat")) s1["x"] = "foo" s2["y"] = "bar" assert s2["x"] == "foo" @@ -87,7 +88,8 @@ def test_two_instances(tmpdir):
def test_datetime_values(tmpdir): import datetime - state = State(os.path.join(str(tmpdir), 'state.dat')) + + state = State(os.path.join(str(tmpdir), "state.dat")) now = datetime.datetime.utcnow().replace(microsecond=0) state["datetimes"] = now assert now == state["datetimes"] diff --git a/tests/unit/util/test_stem.py b/tests/unit/util/test_stem.py index c2aafe9..e738cfc 100644 --- a/tests/unit/util/test_stem.py +++ b/tests/unit/util/test_stem.py @@ -9,21 +9,24 @@ def test_parse_user_torrc_config_new_keyvalue_options_success(): NumCPUs 1 """ torrc_dict = parse_user_torrc_config({}, config_torrc_extra_lines) - assert torrc_dict == \ - {'Log': 'debug file /tmp/tor-debug.log', 'NumCPUs': '1'} + assert torrc_dict == { + "Log": "debug file /tmp/tor-debug.log", + "NumCPUs": "1", + }
def test_parse_user_torrc_config_existing_keyvalue_options_fail(caplog): - torrc_dict = {'SocksPort': 'auto'} + torrc_dict = {"SocksPort": "auto"} config_torrc_extra_lines = """ SocksPort 9050 """ torrc_dict_new = parse_user_torrc_config( - torrc_dict, config_torrc_extra_lines) + torrc_dict, config_torrc_extra_lines + ) # the new dictionary contains the existing key option and a list with both # the existing value and the new value assert torrc_dict_new != torrc_dict - assert torrc_dict_new == {'SocksPort': ['auto', '9050']} + assert torrc_dict_new == {"SocksPort": ["auto", "9050"]}
def test_parse_user_torrc_config_new_key_option_success(): @@ -31,4 +34,4 @@ def test_parse_user_torrc_config_new_key_option_success(): LongLivedPorts """ torrc_dict = parse_user_torrc_config({}, config_torrc_extra_lines) - assert torrc_dict == {'LongLivedPorts': None} + assert torrc_dict == {"LongLivedPorts": None} diff --git a/tests/unit/util/test_timestamp.py b/tests/unit/util/test_timestamp.py index 37cc9ea..62b32a2 100644 --- a/tests/unit/util/test_timestamp.py +++ b/tests/unit/util/test_timestamp.py @@ -1,12 +1,17 @@ # -*- coding: utf-8 -*- """Test timestamp conversion util functions""" from datetime import datetime, timezone, timedelta -from sbws.util.timestamp import (dt_obj_to_isodt_str, unixts_to_dt_obj, - unixts_to_isodt_str, unixts_to_str, is_old) +from sbws.util.timestamp import ( + dt_obj_to_isodt_str, + unixts_to_dt_obj, + unixts_to_isodt_str, + unixts_to_str, + is_old, +)
-isodt_str = '2018-05-23T12:55:04' -dt_obj = datetime.strptime(isodt_str, '%Y-%m-%dT%H:%M:%S') +isodt_str = "2018-05-23T12:55:04" +dt_obj = datetime.strptime(isodt_str, "%Y-%m-%dT%H:%M:%S") unixts = int(dt_obj.replace(tzinfo=timezone.utc).timestamp())
@@ -32,6 +37,7 @@ def test_is_old(): old_timestamp = datetime.utcnow() - timedelta(days=5) assert is_old(old_timestamp) # A recent timestamp should be at least 1 second newer that the oldest - recent_timestamp = datetime.utcnow() - timedelta(days=5) \ - + timedelta(seconds=1) + recent_timestamp = ( + datetime.utcnow() - timedelta(days=5) + timedelta(seconds=1) + ) assert not is_old(recent_timestamp) diff --git a/tests/unit/util/test_userquery.py b/tests/unit/util/test_userquery.py index d1e4012..3313960 100644 --- a/tests/unit/util/test_userquery.py +++ b/tests/unit/util/test_userquery.py @@ -2,98 +2,102 @@ from unittest.mock import patch from sbws.util.userquery import query_yes_no
-@patch('builtins.input') +@patch("builtins.input") def test_userquery_missing_default_invalid_response(input_mock): - input_mock.side_effect = [''] * 100 + ['k'] * 100 + ['yess'] * 100 +\ - ['no o'] * 100 + input_mock.side_effect = ( + [""] * 100 + ["k"] * 100 + ["yess"] * 100 + ["no o"] * 100 + ) try: - query_yes_no('a?', default=None) + query_yes_no("a?", default=None) except StopIteration: pass else: - assert None, 'Should have looped forever (and StopItration been '\ - 'thrown when we stopped feeding it empty responses)' + assert None, ( + "Should have looped forever (and StopItration been " + "thrown when we stopped feeding it empty responses)" + ) assert input_mock.call_count == 401
-@patch('builtins.input') +@patch("builtins.input") def test_userquery_missing_default_yes_response(input_mock): - input_mock.side_effect = [''] * 100 + ['y'] - assert query_yes_no('a?', default=None) + input_mock.side_effect = [""] * 100 + ["y"] + assert query_yes_no("a?", default=None) assert input_mock.call_count == 101 input_mock.reset_mock()
- input_mock.side_effect = [''] * 100 + ['Y'] - assert query_yes_no('a?', default=None) + input_mock.side_effect = [""] * 100 + ["Y"] + assert query_yes_no("a?", default=None) assert input_mock.call_count == 101 input_mock.reset_mock()
- input_mock.side_effect = [''] * 100 + ['Yes'] - assert query_yes_no('a?', default=None) + input_mock.side_effect = [""] * 100 + ["Yes"] + assert query_yes_no("a?", default=None) assert input_mock.call_count == 101 input_mock.reset_mock()
- input_mock.side_effect = ['k'] * 100 + ['Yes'] - assert query_yes_no('a?', default=None) + input_mock.side_effect = ["k"] * 100 + ["Yes"] + assert query_yes_no("a?", default=None) assert input_mock.call_count == 101 input_mock.reset_mock()
- input_mock.side_effect = ['k'] * 100 + ['Yes', 'No'] - assert query_yes_no('a?', default=None) + input_mock.side_effect = ["k"] * 100 + ["Yes", "No"] + assert query_yes_no("a?", default=None) assert input_mock.call_count == 101 input_mock.reset_mock()
-@patch('builtins.input') +@patch("builtins.input") def test_userquery_missing_default_no_response(input_mock): - input_mock.side_effect = [''] * 100 + ['n'] - assert not query_yes_no('a?', default=None) + input_mock.side_effect = [""] * 100 + ["n"] + assert not query_yes_no("a?", default=None) assert input_mock.call_count == 101 input_mock.reset_mock()
- input_mock.side_effect = [''] * 100 + ['N'] - assert not query_yes_no('a?', default=None) + input_mock.side_effect = [""] * 100 + ["N"] + assert not query_yes_no("a?", default=None) assert input_mock.call_count == 101 input_mock.reset_mock()
- input_mock.side_effect = [''] * 100 + ['No'] - assert not query_yes_no('a?', default=None) + input_mock.side_effect = [""] * 100 + ["No"] + assert not query_yes_no("a?", default=None) assert input_mock.call_count == 101 input_mock.reset_mock()
- input_mock.side_effect = ['k'] * 100 + ['No'] - assert not query_yes_no('a?', default=None) + input_mock.side_effect = ["k"] * 100 + ["No"] + assert not query_yes_no("a?", default=None) assert input_mock.call_count == 101 input_mock.reset_mock()
- input_mock.side_effect = ['k'] * 100 + ['No', 'Yes'] - assert not query_yes_no('a?', default=None) + input_mock.side_effect = ["k"] * 100 + ["No", "Yes"] + assert not query_yes_no("a?", default=None) assert input_mock.call_count == 101 input_mock.reset_mock()
-@patch('builtins.input') +@patch("builtins.input") def test_userquery_yes_default_invalid_response(input_mock): - input_mock.side_effect = [''] * 100 - assert query_yes_no('a?', default='yes') + input_mock.side_effect = [""] * 100 + assert query_yes_no("a?", default="yes") assert input_mock.call_count == 1
-@patch('builtins.input') +@patch("builtins.input") def test_userquery_no_default_invalid_response(input_mock): - input_mock.side_effect = [''] * 100 - assert not query_yes_no('a?', default='no') + input_mock.side_effect = [""] * 100 + assert not query_yes_no("a?", default="no") assert input_mock.call_count == 1
-@patch('builtins.input') +@patch("builtins.input") def test_userquery_bad_default_invalid_response(input_mock): - input_mock.side_effect = [''] * 100 + input_mock.side_effect = [""] * 100 try: - query_yes_no('a?', default='nooo') + query_yes_no("a?", default="nooo") except ValueError: pass else: - assert None, 'Should not have allowed us to specify a bad default '\ - 'value' + assert None, ( + "Should not have allowed us to specify a bad default " "value" + ) assert input_mock.call_count == 0