commit c7eda42e58a3fb328def64355e093440d4bca1af Author: Matt Traudt sirmatt@ksu.edu Date: Wed Jun 20 14:33:15 2018 -0400
Cap results at generate time instead of measurement time
This required a bump in the result version: we add a new field to ResultSuccess. --- sbws/core/scanner.py | 36 ------------------------------------ sbws/globals.py | 2 +- sbws/lib/resultdump.py | 14 +++++++++++--- sbws/lib/v3bwfile.py | 20 +++++++++++++++++++- 4 files changed, 31 insertions(+), 41 deletions(-)
diff --git a/sbws/core/scanner.py b/sbws/core/scanner.py index 2a3aadc..6af1b0c 100644 --- a/sbws/core/scanner.py +++ b/sbws/core/scanner.py @@ -165,41 +165,6 @@ def _pick_ideal_second_hop(relay, dest, rl, cont, is_exit): return chosen
-def _clamp_bw_results(relay, bw_results): - ''' - If a relay has MaxAdvertisedBandwidth set, they may be capable of some - large amount of bandwidth but prefer if they didn't receive it. We also - could have managed to measure them faster than their {,Relay}BandwidthRate - somehow. - - See https://github.com/pastly/simple-bw-scanner/issues/155 and - https://trac.torproject.org/projects/tor/ticket/8494 - ''' - upper_bound = relay.average_bandwidth - if upper_bound is None: - log.warning( - 'Could not get average bandwidth from %s's descriptor. Not ' - 'capping the results for it to some upper bound.', relay.nickname) - return bw_results - capped_count = 0 - new_results = [] - for result in bw_results: - rate = result['amount'] / result['duration'] - if rate > upper_bound: - capped_count += 1 - new_results.append({ - 'amount': int(upper_bound * result['duration']), - 'duration': result['duration']}) - else: - new_results.append(result) - assert len(new_results) == len(bw_results) - if capped_count > 0: - log.debug( - 'Capped %d results to %d for relay %s', capped_count, upper_bound, - relay.nickname) - return new_results - - def measure_relay(args, conf, destinations, cb, rl, relay): s = requests_utils.make_session( cb.controller, conf.getfloat('general', 'http_timeout')) @@ -281,7 +246,6 @@ def measure_relay(args, conf, destinations, cb, rl, relay): ResultErrorStream(relay, circ_fps, dest.url, our_nick, msg=msg), ] cb.close_circuit(circ_id) - bw_results = _clamp_bw_results(relay, bw_results) # Finally: store result return [ ResultSuccess(rtts, bw_results, relay, circ_fps, dest.url, our_nick), diff --git a/sbws/globals.py b/sbws/globals.py index e48b378..3ae7d60 100644 --- a/sbws/globals.py +++ b/sbws/globals.py @@ -4,7 +4,7 @@ import socket
log = logging.getLogger(__name__)
-RESULT_VERSION = 3 +RESULT_VERSION = 4 WIRE_VERSION = 1 SPEC_VERSION = '1.1.0'
diff --git a/sbws/lib/resultdump.py b/sbws/lib/resultdump.py index 32fd1cd..21d7cc0 100644 --- a/sbws/lib/resultdump.py +++ b/sbws/lib/resultdump.py @@ -150,15 +150,18 @@ class Result: class Relay: ''' Implements just enough of a stem RouterStatusEntryV3 for this Result class to be happy ''' - def __init__(self, fingerprint, nickname, address, master_key_ed25519): + def __init__(self, fingerprint, nickname, address, master_key_ed25519, + average_bandwidth=None): self.fingerprint = fingerprint self.nickname = nickname self.address = address self.master_key_ed25519 = master_key_ed25519 + self.average_bandwidth = average_bandwidth
def __init__(self, relay, circ, dest_url, scanner_nick, t=None): self._relay = Result.Relay(relay.fingerprint, relay.nickname, - relay.address, relay.master_key_ed25519) + relay.address, relay.master_key_ed25519, + relay.average_bandwidth) self._circ = circ self._dest_url = dest_url self._scanner = scanner_nick @@ -169,6 +172,10 @@ class Result: raise NotImplementedError()
@property + def relay_average_bandwidth(self): + return self._relay.average_bandwidth + + @property def fingerprint(self): return self._relay.fingerprint
@@ -417,7 +424,7 @@ class ResultSuccess(Result): d['rtts'], d['downloads'], Result.Relay( d['fingerprint'], d['nickname'], d['address'], - d['master_key_ed25519']), + d['master_key_ed25519'], d['relay_average_bandwidth']), d['circ'], d['dest_url'], d['scanner'], t=d['time'])
@@ -426,6 +433,7 @@ class ResultSuccess(Result): d.update({ 'rtts': self.rtts, 'downloads': self.downloads, + 'relay_average_bandwidth': self.relay_average_bandwidth, }) return d
diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py index 5b07f80..64395ef 100644 --- a/sbws/lib/v3bwfile.py +++ b/sbws/lib/v3bwfile.py @@ -307,8 +307,26 @@ class V3BWLine(object): def bw_from_results(results): median_bw = median([dl['amount'] / dl['duration'] for r in results for dl in r.downloads]) + # If a relay has MaxAdvertisedBandwidth set, they may be capable of + # some large amount of bandwidth but prefer if they didn't receive it. + # We also could have managed to measure them faster than their + # {,Relay}BandwidthRate somehow. + # + # See https://github.com/pastly/simple-bw-scanner/issues/155 and + # https://trac.torproject.org/projects/tor/ticket/8494 + # + # Note how this isn't some measured-by-us average of bandwidth. It's + # the first value on the 'bandwidth' line in the relay's server + # descriptor. + relay_average_bw = median([ + r.relay_average_bandwidth for r in results + if r.relay_average_bandwidth is not None]) + if median_bw > relay_average_bw: + bw = relay_average_bw + else: + bw = median_bw # convert to KB and ensure it's at least 1 - bw_kb = max(round(median_bw / 1024), 1) + bw_kb = max(round(bw / 1024), 1) return bw_kb
@staticmethod