commit 4729aa8303217b5478a18b75e01f4a3aa7cfde41
Author: juga0 <juga(a)riseup.net>
Date: Sun Mar 17 16:05:04 2019 +0000
chg: v3bwfile: Include excluded relays
in the bandwidth file to know why there excluded relays.
The excluded relays' lines do not have the `bw` KeyValue so that
Tor does not parse them nor include them in a vote.
Closes: #28563
---
sbws/lib/v3bwfile.py | 99 ++++++++++++++++++++++++++++++-----------
tests/unit/lib/test_v3bwfile.py | 25 ++++++++++-
2 files changed, 96 insertions(+), 28 deletions(-)
diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py
index 0a95510..50e353c 100644
--- a/sbws/lib/v3bwfile.py
+++ b/sbws/lib/v3bwfile.py
@@ -116,6 +116,15 @@ BW_KEYVALUES_BASIC = ['node_id', 'bw']
BW_KEYVALUES_FILE = BW_KEYVALUES_BASIC + \
['master_key_ed25519', 'nick', 'rtt', 'time',
'success', 'error_stream', 'error_circ', 'error_misc',
+ # `vote=0` is used for the relays that were excluded to
+ # be reported in the bandwidth file and now they are
+ # reported.
+ # It tells Tor to do not vote on the relay.
+ # `unmeasured=1` is used for the same relays and it is
+ # added in case Tor would vote on them in future versions.
+ # Maybe these keys should not be included for the relays
+ # in which vote=1 and unmeasured=0.
+ 'vote', 'unmeasured',
# Added in #292951
'error_second_relay', 'error_destination']
BW_KEYVALUES_EXTRA_BWS = ['bw_median', 'bw_mean', 'desc_bw_avg', 'desc_bw_bur',
@@ -164,8 +173,11 @@ BANDWIDTH_LINE_KEY_VALUES_MONITOR = [
]
BW_KEYVALUES_EXTRA = BW_KEYVALUES_FILE + BW_KEYVALUES_EXTRA_BWS \
+ BANDWIDTH_LINE_KEY_VALUES_MONITOR
+# NOTE: tech-debt: assign boolean type to vote and unmeasured,
+# when the attributes are defined with a type, as stem does.
BW_KEYVALUES_INT = ['bw', 'rtt', 'success', 'error_stream',
- 'error_circ', 'error_misc'] + BW_KEYVALUES_EXTRA_BWS \
+ 'error_circ', 'error_misc', 'vote', 'unmeasured'] \
+ + BW_KEYVALUES_EXTRA_BWS \
+ BANDWIDTH_LINE_KEY_VALUES_MONITOR
BW_KEYVALUES = BW_KEYVALUES_BASIC + BW_KEYVALUES_EXTRA
@@ -465,22 +477,16 @@ class V3BWLine(object):
"""
Create a Bandwidth List line following the spec version 1.X.X.
- :param str node_id:
- :param int bw:
- :param dict kwargs: extra headers. Currently supported:
+ :param str node_id: the relay fingerprint
+ :param int bw: the bandwidth value that directory authorities will include
+ in their votes.
+ :param dict kwargs: extra headers.
- - nickname, str
- - master_key_ed25519, str
- - rtt, int
- - time, str
- - sucess, int
- - error_stream, int
- - error_circ, int
- - error_misc, int
+ .. note:: tech-debt: move node_id and bw to kwargs and just ensure that
+ the required values are in **kwargs
"""
def __init__(self, node_id, bw, **kwargs):
assert isinstance(node_id, str)
- assert isinstance(bw, int)
assert node_id.startswith('$')
self.node_id = node_id
self.bw = bw
@@ -544,29 +550,60 @@ class V3BWLine(object):
# In #28563 will include again these lines, but make them unparseable
# by Tor.
# This might confirm #28042.
+
+ # If the relay is non-`eligible`:
+ # Create a bandwidth line with the relay, but set ``vote=0`` so that
+ # Tor versions with patch #29806 does not vote on the relay.
+ # Set ``bw=1`` so that Tor versions without the patch,
+ # will give the relay low bandwidth.
+ # Include ``unmeasured=1`` in case Tor would vote on unmeasured relays
+ # in future versions.
+ # And return because there are not bandwidth values.
+ # NOTE: the bandwidth values could still be obtained if:
+ # 1. ``ResultError`` will store them
+ # 2. assign ``results_recent = results`` when there is a ``exclusion
+ # reason.
+ # This could be done in a better way as part of a refactor #28684.
+
+ kwargs['vote'] = '0'
+ kwargs['unmeasured'] = '1'
+
+ exclusion_reason = None
kwargs['relay_recent_measurement_exclusion_not_success_count'] = \
len(results) - len(success_results)
if not success_results:
- return None, 'recent_measurement_exclusion_not_success_count'
+ exclusion_reason = 'recent_measurement_exclusion_not_success_count'
+ return (cls(node_id, 1, **kwargs), exclusion_reason)
results_away = \
cls.results_away_each_other(success_results, secs_away)
kwargs['relay_recent_measurement_exclusion_not_distanciated_count'] = \
len(success_results) - len(results_away)
if not results_away:
- return None, 'recent_measurement_exclusion_not_distanciated_count'
+ exclusion_reason = \
+ 'recent_measurement_exclusion_not_distanciated_count'
+ return (cls(node_id, 1, **kwargs), exclusion_reason)
# log.debug("Results away from each other: %s",
# [unixts_to_isodt_str(r.time) for r in results_away])
results_recent = cls.results_recent_than(results_away, secs_recent)
kwargs['relay_recent_measurement_exclusion_not_recent_count'] = \
len(results_away) - len(results_recent)
if not results_recent:
- return None, 'recent_measurement_exclusion_not_recent_count'
+ exclusion_reason = \
+ 'recent_measurement_exclusion_not_recent_count'
+ return (cls(node_id, 1, **kwargs), exclusion_reason)
if not len(results_recent) >= min_num:
kwargs['relay_recent_measurement_exclusion_not_min_num_count'] = \
len(results_recent)
# log.debug('The number of results is less than %s', min_num)
- return None, 'recent_measurement_exclusion_not_min_num_count'
+ exclusion_reason = \
+ 'recent_measurement_exclusion_not_min_num_count'
+ return (cls(node_id, 1, **kwargs), exclusion_reason)
+
+ # For any line not excluded, do not include vote and unmeasured
+ # KeyValues
+ del kwargs['vote']
+ del kwargs['unmeasured']
rtt = cls.rtt_from_results(results_recent)
if rtt:
@@ -589,7 +626,7 @@ class V3BWLine(object):
kwargs['desc_bw_obs_mean'] = \
cls.desc_bw_obs_mean_from_results(results_recent)
bwl = cls(node_id, bw, **kwargs)
- return bwl, None
+ return bwl, exclusion_reason
@classmethod
def from_data(cls, data, fingerprint):
@@ -623,7 +660,7 @@ class V3BWLine(object):
return results
# log.debug("Results are NOT away from each other in at least %ss: %s",
# secs_away, [unixts_to_isodt_str(r.time) for r in results])
- return None
+ return []
@staticmethod
def results_recent_than(results, secs_recent=None):
@@ -793,6 +830,7 @@ class V3BWFile(object):
header = V3BWHeader.from_results(results, scanner_country,
destinations_countries, state_fpath)
bw_lines_raw = []
+ bw_lines_excluded = []
number_consensus_relays = cls.read_number_consensus_relays(
consensus_path)
state = State(state_fpath)
@@ -811,9 +849,13 @@ class V3BWFile(object):
# log.debug("Relay fp %s", fp)
line, reason = V3BWLine.from_results(values, secs_recent,
secs_away, min_num)
- if line is not None:
+ # If there is no reason it means the line will not be excluded.
+ if not reason:
bw_lines_raw.append(line)
else:
+ # Store the excluded lines to include them in the bandwidth
+ # file.
+ bw_lines_excluded.append(line)
exclusion_dict[reason] = exclusion_dict.get(reason, 0) + 1
# Add the headers with the number of excluded relays by reason
header.add_relays_excluded_counters(exclusion_dict)
@@ -823,7 +865,9 @@ class V3BWFile(object):
"there is not any. Scaling can not be applied.")
cls.update_progress(
cls, bw_lines_raw, header, number_consensus_relays, state)
- return cls(header, [])
+ # Create the bandwidth file with the excluded lines that does not
+ # have ``bw`` attribute
+ return cls(header, bw_lines_excluded)
if scaling_method == SBWS_SCALING:
bw_lines = cls.bw_sbws_scale(bw_lines_raw, scale_constant)
cls.warn_if_not_accurate_enough(bw_lines, scale_constant)
@@ -839,7 +883,8 @@ class V3BWFile(object):
# log.debug(bw_lines[-1])
# Not using the result for now, just warning
cls.is_max_bw_diff_perc_reached(bw_lines, max_bw_diff_perc)
- f = cls(header, bw_lines)
+ # Include excluded lines that does not have ``bw`` attribute.
+ f = cls(header, bw_lines + bw_lines_excluded)
return f
@classmethod
@@ -1201,7 +1246,7 @@ class V3BWFile(object):
@property
def sum_bw(self):
- return sum([l.bw for l in self.bw_lines])
+ return sum([l.bw for l in self.bw_lines if hasattr(l, 'bw')])
@property
def num(self):
@@ -1209,19 +1254,19 @@ class V3BWFile(object):
@property
def mean_bw(self):
- return mean([l.bw for l in self.bw_lines])
+ return mean([l.bw for l in self.bw_lines if hasattr(l, 'bw')])
@property
def median_bw(self):
- return median([l.bw for l in self.bw_lines])
+ return median([l.bw for l in self.bw_lines if hasattr(l, 'bw')])
@property
def max_bw(self):
- return max([l.bw for l in self.bw_lines])
+ return max([l.bw for l in self.bw_lines if hasattr(l, 'bw')])
@property
def min_bw(self):
- return min([l.bw for l in self.bw_lines])
+ return min([l.bw for l in self.bw_lines if hasattr(l, 'bw')])
@property
def info_stats(self):
diff --git a/tests/unit/lib/test_v3bwfile.py b/tests/unit/lib/test_v3bwfile.py
index 022e070..021eb95 100644
--- a/tests/unit/lib/test_v3bwfile.py
+++ b/tests/unit/lib/test_v3bwfile.py
@@ -55,6 +55,9 @@ header_extra_ls = [timestamp_l, version_l,
header_extra_str = LINE_SEP.join(header_extra_ls) + LINE_SEP
# Line produced without any scaling.
+# unmeasured and vote are not congruent with the exclusion,
+# but `from_data` is only used in the test and doesn't include the
+# arg `min_num`
raw_bwl_str = "bw=56 bw_mean=61423 bw_median=55656 "\
"consensus_bandwidth=600000 consensus_bandwidth_is_unmeasured=False "\
"desc_bw_avg=1000000000 desc_bw_bur=123456 desc_bw_obs_last=524288 "\
@@ -319,27 +322,47 @@ def test_results_away_each_other(datadir):
results = load_result_file(str(datadir.join("results_away.txt")))
# A has 4 results, 3 are success, 2 are 1 day away, 1 is 12h away
values = results["AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"]
+
+ # There is one result excluded, but the relay is not excluded
+ bwl, _ = V3BWLine.from_results(values, secs_away=secs_away, min_num=2)
+ assert not hasattr(bwl, "vote")
+ assert not hasattr(bwl, "unmeasured")
+
success_results = [r for r in values if isinstance(r, ResultSuccess)]
assert len(success_results) >= min_num
results_away = V3BWLine.results_away_each_other(success_results, secs_away)
assert len(results_away) == 3
+
# B has 2 results, 12h away from each other
values = results["BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"]
success_results = [r for r in values if isinstance(r, ResultSuccess)]
assert len(success_results) >= min_num
results_away = V3BWLine.results_away_each_other(success_results, secs_away)
- assert results_away is None
+ assert not results_away
+
+ # Two measurements are excluded and there were only 2,
+ # the relay is excluded
+ bwl, _ = V3BWLine.from_results(values, secs_away=secs_away, min_num=2)
+ assert bwl.vote == '0'
+ assert bwl.unmeasured == '1'
+
secs_away = 43200 # 12h
values = results["BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"]
success_results = [r for r in values if isinstance(r, ResultSuccess)]
assert len(success_results) >= min_num
results_away = V3BWLine.results_away_each_other(success_results, secs_away)
assert len(results_away) == 2
+
# C has 1 result
values = results["CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"]
success_results = [r for r in values if isinstance(r, ResultSuccess)]
assert len(success_results) < min_num
+ # There is only 1 result, the relay is excluded
+ bwl, _ = V3BWLine.from_results(values, min_num=2)
+ assert bwl.vote == '0'
+ assert bwl.unmeasured == '1'
+
def test_measured_progress_stats(datadir):
number_consensus_relays = 3