commit 090507ee36a4685055a4e829fd5b4a4428aa3b63 Author: juga0 juga@riseup.net Date: Thu Aug 30 12:55:23 2018 +0000
Implement method to scale as Torflow --- sbws/lib/v3bwfile.py | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 189 insertions(+), 2 deletions(-)
diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py index 5ce1423..28be1a9 100644 --- a/sbws/lib/v3bwfile.py +++ b/sbws/lib/v3bwfile.py @@ -481,9 +481,196 @@ class V3BWFile(object): 'allowed', (1 - accuracy_ratio) * 100, margin * 100)
@staticmethod - def bw_lines_torflow(bw_lines, desc_obs_bws=TORFLOW_OBS_LAST, + def bw_torflow_scale(bw_lines, desc_obs_bws=TORFLOW_OBS_LAST, cap=TORFLOW_BW_MARGIN, reverse=False): - pass + """ + Obtain final bandwidth measurements applying Torflow's scaling + method. + + From Torflow's README.spec.txt (section 2.2):: + + In this way, the resulting network status consensus bandwidth values # NOQA + are effectively re-weighted proportional to how much faster the node # NOQA + was as compared to the rest of the network. + + The variables and steps used in Torflow: + + **strm_bw**:: + + The strm_bw field is the average (mean) of all the streams for the relay # NOQA + identified by the fingerprint field. + strm_bw = sum(bw stream x)/|n stream| + + **filt_bw**:: + + The filt_bw field is computed similarly, but only the streams equal to # NOQA + or greater than the strm_bw are counted in order to filter very slow # NOQA + streams due to slow node pairings. + + **filt_sbw and strm_sbw**:: + + for rs in RouterStats.query.filter(stats_clause).\ + options(eagerload_all('router.streams.circuit.routers')).all(): # NOQA + tot_sbw = 0 + sbw_cnt = 0 + for s in rs.router.streams: + if isinstance(s, ClosedStream): + skip = False + #for br in badrouters: + # if br != rs: + # if br.router in s.circuit.routers: + # skip = True + if not skip: + # Throw out outliers < mean + # (too much variance for stddev to filter much) + if rs.strm_closed == 1 or s.bandwidth() >= rs.sbw: + tot_sbw += s.bandwidth() + sbw_cnt += 1 + + if sbw_cnt: rs.filt_sbw = tot_sbw/sbw_cnt + else: rs.filt_sbw = None + + **filt_avg, and strm_avg**:: + + Once we have determined the most recent measurements for each node, we # NOQA + compute an average of the filt_bw fields over all nodes we have measured. # NOQA + + :: + + filt_avg = sum(map(lambda n: n.filt_bw, nodes.itervalues()))/float(len(nodes)) # NOQA + strm_avg = sum(map(lambda n: n.strm_bw, nodes.itervalues()))/float(len(nodes)) # NOQA + + **true_filt_avg and true_strm_avg**:: + + for cl in ["Guard+Exit", "Guard", "Exit", "Middle"]: + true_filt_avg[cl] = filt_avg + true_strm_avg[cl] = strm_avg + + In the non-pid case, all types of nodes get the same avg + + **n.fbw_ratio and n.fsw_ratio**:: + + for n in nodes.itervalues(): + n.fbw_ratio = n.filt_bw/true_filt_avg[n.node_class()] + n.sbw_ratio = n.strm_bw/true_strm_avg[n.node_class()] + + **n.ratio**:: + + These averages are used to produce ratios for each node by dividing the # NOQA + measured value for that node by the network average. + + :: + + # Choose the larger between sbw and fbw + if n.sbw_ratio > n.fbw_ratio: + n.ratio = n.sbw_ratio + else: + n.ratio = n.fbw_ratio + + **desc_bw**: + + It is the ``observed bandwidth`` in the descriptor, NOT the ``average + bandwidth``:: + + return Router(ns.idhex, ns.nickname, bw_observed, dead, exitpolicy, + ns.flags, ip, version, os, uptime, published, contact, rate_limited, # NOQA + ns.orhash, ns.bandwidth, extra_info_digest, ns.unmeasured) + self.desc_bw = max(bw,1) # Avoid div by 0 + + **new_bw**:: + + These ratios are then multiplied by the most recent observed descriptor # NOQA + bandwidth we have available for each node, to produce a new value for # NOQA + the network status consensus process. + + :: + + n.new_bw = n.desc_bw*n.ratio + + The descriptor observed bandwidth is multiplied by the ratio. + With empirical results this ratio is ~[0.9, 8.9] + + **Limit the bandwidth to a maximum**:: + + NODE_CAP = 0.05 + + :: + + if n.new_bw > tot_net_bw*NODE_CAP: + plog("INFO", "Clipping extremely fast "+n.node_class()+" node "+n.idhex+"="+n.nick+ # NOQA + " at "+str(100*NODE_CAP)+"% of network capacity ("+ + str(n.new_bw)+"->"+str(int(tot_net_bw*NODE_CAP))+") "+ + " pid_error="+str(n.pid_error)+ + " pid_error_sum="+str(n.pid_error_sum)) + n.new_bw = int(tot_net_bw*NODE_CAP) + + However, tot_net_bw does not seems to be updated when not using pid. + This clipping would make faster relays to all have the same value. + + All of that can be expressed as: + + .. math:: + + bwnew_i &= + max\left( + \frac{bw_i}{\mu}, + min \left( + bw_i, + bw_i \times \mu + \right) + \times + \frac{bw}{\sum_{i=1}^{n} + min \left(bw_i, + bw_i \times \mu + \right)} + \right) + \times bwdescobs_i \ + + &= + max\left( + \frac{bw_i}{\frac{\sum_{i=1}^{n}bw_i}{n}}, + min \left( + bw_i, + bw_i \times \frac{\sum_{i=1}^{n}bw_i}{n} + \right) + \times + \frac{bw}{\sum_{i=1}^{n} + min \left(bw_i, + bw_i \times \frac{\sum_{i=1}^{n}bw_i}{n} + \right)} + \right) + \times bwdescobs_i + + """ + log.info("Calculating relays' bandwidth using Torflow method.") + bw_lines_tf = copy.deepcopy(bw_lines) + # mean (Torflow's strm_avg) + mu = mean([l.bw_bs_mean for l in bw_lines]) + # filtered mean (Torflow's filt_avg) + muf = mean([min(l.bw_bs_mean, mu) for l in bw_lines]) + # bw sum (Torflow's tot_net_bw or tot_sbw) + sum_bw = sum([l.bw_bs_mean for l in bw_lines]) + # Torflow's clipping, not being applied + # hlimit = sum_bw * TORFLOW_BW_MARGIN + log.debug('sum %s', sum_bw) + log.debug('mu %s', mu) + log.debug('muf %s', muf) + # log.debug('hlimit %s', hlimit) + for l in bw_lines_tf: + if desc_obs_bws == TORFLOW_OBS_LAST: + desc_obs_bw = l.desc_obs_bw_bs_last + elif desc_obs_bws == TORFLOW_OBS_MEAN: + desc_obs_bw = l.desc_obs_bw_bs_mean + # just applying the formula above: + l.bw = max(round( + max( + # ratio + l.bw_bs_mean / mu, + # ratio filtered + min(l.bw_bs_mean, mu) / muf + ) * desc_obs_bw + / 1000), 1) + return sorted(bw_lines_tf, key=lambda x: x.bw, reverse=reverse)
@property def sum_bw(self):