[tor-commits] [sbws/master] Cap results at generate time instead of measurement time

pastly at torproject.org pastly at torproject.org
Tue Jun 26 15:36:50 UTC 2018


commit c7eda42e58a3fb328def64355e093440d4bca1af
Author: Matt Traudt <sirmatt at ksu.edu>
Date:   Wed Jun 20 14:33:15 2018 -0400

    Cap results at generate time instead of measurement time
    
    This required a bump in the result version: we add a new field to
    ResultSuccess.
---
 sbws/core/scanner.py   | 36 ------------------------------------
 sbws/globals.py        |  2 +-
 sbws/lib/resultdump.py | 14 +++++++++++---
 sbws/lib/v3bwfile.py   | 20 +++++++++++++++++++-
 4 files changed, 31 insertions(+), 41 deletions(-)

diff --git a/sbws/core/scanner.py b/sbws/core/scanner.py
index 2a3aadc..6af1b0c 100644
--- a/sbws/core/scanner.py
+++ b/sbws/core/scanner.py
@@ -165,41 +165,6 @@ def _pick_ideal_second_hop(relay, dest, rl, cont, is_exit):
     return chosen
 
 
-def _clamp_bw_results(relay, bw_results):
-    '''
-    If a relay has MaxAdvertisedBandwidth set, they may be capable of some
-    large amount of bandwidth but prefer if they didn't receive it. We also
-    could have managed to measure them faster than their {,Relay}BandwidthRate
-    somehow.
-
-    See https://github.com/pastly/simple-bw-scanner/issues/155 and
-    https://trac.torproject.org/projects/tor/ticket/8494
-    '''
-    upper_bound = relay.average_bandwidth
-    if upper_bound is None:
-        log.warning(
-            'Could not get average bandwidth from %s\'s descriptor. Not '
-            'capping the results for it to some upper bound.', relay.nickname)
-        return bw_results
-    capped_count = 0
-    new_results = []
-    for result in bw_results:
-        rate = result['amount'] / result['duration']
-        if rate > upper_bound:
-            capped_count += 1
-            new_results.append({
-                'amount': int(upper_bound * result['duration']),
-                'duration': result['duration']})
-        else:
-            new_results.append(result)
-    assert len(new_results) == len(bw_results)
-    if capped_count > 0:
-        log.debug(
-            'Capped %d results to %d for relay %s', capped_count, upper_bound,
-            relay.nickname)
-    return new_results
-
-
 def measure_relay(args, conf, destinations, cb, rl, relay):
     s = requests_utils.make_session(
         cb.controller, conf.getfloat('general', 'http_timeout'))
@@ -281,7 +246,6 @@ def measure_relay(args, conf, destinations, cb, rl, relay):
             ResultErrorStream(relay, circ_fps, dest.url, our_nick, msg=msg),
         ]
     cb.close_circuit(circ_id)
-    bw_results = _clamp_bw_results(relay, bw_results)
     # Finally: store result
     return [
         ResultSuccess(rtts, bw_results, relay, circ_fps, dest.url, our_nick),
diff --git a/sbws/globals.py b/sbws/globals.py
index e48b378..3ae7d60 100644
--- a/sbws/globals.py
+++ b/sbws/globals.py
@@ -4,7 +4,7 @@ import socket
 
 log = logging.getLogger(__name__)
 
-RESULT_VERSION = 3
+RESULT_VERSION = 4
 WIRE_VERSION = 1
 SPEC_VERSION = '1.1.0'
 
diff --git a/sbws/lib/resultdump.py b/sbws/lib/resultdump.py
index 32fd1cd..21d7cc0 100644
--- a/sbws/lib/resultdump.py
+++ b/sbws/lib/resultdump.py
@@ -150,15 +150,18 @@ class Result:
     class Relay:
         ''' Implements just enough of a stem RouterStatusEntryV3 for this
         Result class to be happy '''
-        def __init__(self, fingerprint, nickname, address, master_key_ed25519):
+        def __init__(self, fingerprint, nickname, address, master_key_ed25519,
+                     average_bandwidth=None):
             self.fingerprint = fingerprint
             self.nickname = nickname
             self.address = address
             self.master_key_ed25519 = master_key_ed25519
+            self.average_bandwidth = average_bandwidth
 
     def __init__(self, relay, circ, dest_url, scanner_nick, t=None):
         self._relay = Result.Relay(relay.fingerprint, relay.nickname,
-                                   relay.address, relay.master_key_ed25519)
+                                   relay.address, relay.master_key_ed25519,
+                                   relay.average_bandwidth)
         self._circ = circ
         self._dest_url = dest_url
         self._scanner = scanner_nick
@@ -169,6 +172,10 @@ class Result:
         raise NotImplementedError()
 
     @property
+    def relay_average_bandwidth(self):
+        return self._relay.average_bandwidth
+
+    @property
     def fingerprint(self):
         return self._relay.fingerprint
 
@@ -417,7 +424,7 @@ class ResultSuccess(Result):
             d['rtts'], d['downloads'],
             Result.Relay(
                 d['fingerprint'], d['nickname'], d['address'],
-                d['master_key_ed25519']),
+                d['master_key_ed25519'], d['relay_average_bandwidth']),
             d['circ'], d['dest_url'], d['scanner'],
             t=d['time'])
 
@@ -426,6 +433,7 @@ class ResultSuccess(Result):
         d.update({
             'rtts': self.rtts,
             'downloads': self.downloads,
+            'relay_average_bandwidth': self.relay_average_bandwidth,
         })
         return d
 
diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py
index 5b07f80..64395ef 100644
--- a/sbws/lib/v3bwfile.py
+++ b/sbws/lib/v3bwfile.py
@@ -307,8 +307,26 @@ class V3BWLine(object):
     def bw_from_results(results):
         median_bw = median([dl['amount'] / dl['duration']
                             for r in results for dl in r.downloads])
+        # If a relay has MaxAdvertisedBandwidth set, they may be capable of
+        # some large amount of bandwidth but prefer if they didn't receive it.
+        # We also could have managed to measure them faster than their
+        # {,Relay}BandwidthRate somehow.
+        #
+        # See https://github.com/pastly/simple-bw-scanner/issues/155 and
+        # https://trac.torproject.org/projects/tor/ticket/8494
+        #
+        # Note how this isn't some measured-by-us average of bandwidth. It's
+        # the first value on the 'bandwidth' line in the relay's server
+        # descriptor.
+        relay_average_bw = median([
+            r.relay_average_bandwidth for r in results
+            if r.relay_average_bandwidth is not None])
+        if median_bw > relay_average_bw:
+            bw = relay_average_bw
+        else:
+            bw = median_bw
         # convert to KB and ensure it's at least 1
-        bw_kb = max(round(median_bw / 1024), 1)
+        bw_kb = max(round(bw / 1024), 1)
         return bw_kb
 
     @staticmethod





More information about the tor-commits mailing list