[torflow/master] Change how circuit failure is used.

2 Dec 2011

commit 83f29ce70d0846206daa76f7ad8ab2cf3f5efbd6
Author: Mike Perry <mikeperry-git@fscked.org>
Date:   Thu Dec 1 20:54:46 2011 -0800

    Change how circuit failure is used.
    
    Let's try computing a second error and take their sum.
---
 NetworkScanners/BwAuthority/README.spec.txt |   16 +++++++---
 NetworkScanners/BwAuthority/aggregate.py    |   40 +++++++++++++++------------
 2 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/NetworkScanners/BwAuthority/README.spec.txt b/NetworkScanners/BwAuthority/README.spec.txt
index acce106..db5c125 100644
--- a/NetworkScanners/BwAuthority/README.spec.txt
+++ b/NetworkScanners/BwAuthority/README.spec.txt
@@ -369,9 +369,7 @@
 3.1. Modeling Measurement as PID Control
 
    The bandwidth authorities measure F_node: the filtered stream
-   capacity through a given node (filtering is described in Section 1.6)
-   times the circuit success rate of circuit EXTENDs to the node
-   (1.0 - circ_fail_rate).
+   capacity through a given node (filtering is described in Section 1.6).
 
    In PID control, we add in this extra failure rate as a damper, to prevent
    the PID control system from driving nodes to CPU overload. Once nodes
@@ -509,8 +507,16 @@
        retaining PID state.
 
     "bwauthcircs=1"
-       If present, F_node is multiplied by (1.0 - circ_fail_rate)
-       as described in Section 3.1.
+       If present, an additional circ_error value is computed for each
+       node similar to pid_error of Section 3.1. This value is:
+
+          circ_error = (circ_rate - circ_avg_rate)/circ_avg_rate
+
+       Where circ_rate and circ_avg_rate are the EXTEND success rates 
+       to the node, and the average success rate for the entire node class,
+       respectively.
+
+       This error value is then added to pid_error. 
 
     "bwauthbestratio=0"
        If absent, the larger of stream bandwidth vs filtered bandwidth
diff --git a/NetworkScanners/BwAuthority/aggregate.py b/NetworkScanners/BwAuthority/aggregate.py
index 120de07..ccf6436 100755
--- a/NetworkScanners/BwAuthority/aggregate.py
+++ b/NetworkScanners/BwAuthority/aggregate.py
@@ -449,17 +449,13 @@ def main(argv):
     plog("NOTICE", "No scan results yet.")
     sys.exit(1)
 
-  if not cs_junk.use_circ_fails:
-    plog("INFO", "Ignoring circuit failures")
-    for n in nodes.itervalues():
-      n.circ_fail_rate = 0.0
-
   for idhex in nodes.iterkeys():
     if idhex in prev_consensus:
       nodes[idhex].flags = prev_consensus[idhex].flags
 
   true_filt_avg = {}
   true_strm_avg = {}
+  true_circ_avg = {}
 
   if cs_junk.bwauth_pid_control:
     # Penalize nodes for circuit failure: it indicates CPU pressure
@@ -470,19 +466,21 @@ def main(argv):
     if cs_junk.group_by_class:
       for c in ["Guard+Exit", "Guard", "Exit", "Middle"]:
         c_nodes = filter(lambda n: n.node_class() == c, nodes.itervalues())
-        true_filt_avg[c] = sum(map(lambda n: n.filt_bw*(1.0-n.circ_fail_rate),
-                             c_nodes))/float(len(c_nodes))
-        true_strm_avg[c] = sum(map(lambda n: n.strm_bw*(1.0-n.circ_fail_rate),
-                             c_nodes))/float(len(c_nodes))
+        true_filt_avg[c] = sum(map(lambda n: n.filt_bw, c_nodes))/float(len(c_nodes))
+        true_strm_avg[c] = sum(map(lambda n: n.strm_bw, c_nodes))/float(len(c_nodes))
+        true_circ_avg[c] = sum(map(lambda n: (1.0-n.circ_fail_rate),
+                               c_nodes))/float(len(c_nodes))
         plog("INFO", "Network true_filt_avg["+c+"]: "+str(true_filt_avg[c]))
+        plog("INFO", "Network true_circ_avg["+c+"]: "+str(true_circ_avg[c]))
     else:
-      filt_avg = sum(map(lambda n: n.filt_bw*(1.0-n.circ_fail_rate),
-                      nodes.itervalues()))/float(len(nodes))
-      strm_avg = sum(map(lambda n: n.strm_bw*(1.0-n.circ_fail_rate),
-                           nodes.itervalues()))/float(len(nodes))
+      filt_avg = sum(map(lambda n: n.filt_bw, nodes.itervalues()))/float(len(nodes))
+      strm_avg = sum(map(lambda n: n.strm_bw, nodes.itervalues()))/float(len(nodes))
+      circ_avg = sum(map(lambda n: (1.0-n.circ_fail_rate),
+                         nodes.itervalues()))/float(len(nodes))
       for c in ["Guard+Exit", "Guard", "Exit", "Middle"]:
         true_filt_avg[c] = filt_avg
         true_strm_avg[c] = strm_avg
+        true_circ_avg[c] = circ_avg
   else:
     plog("INFO", "PID control disabled")
     filt_avg = sum(map(lambda n: n.filt_bw*(1.0-n.circ_fail_rate),
@@ -535,16 +533,22 @@ def main(argv):
       else:
         n.use_bw = n.ns_bw
 
-      # Penalize nodes for circ failure rate
       if cs_junk.use_best_ratio and n.sbw_ratio > n.fbw_ratio:
-        n.pid_error = (n.strm_bw*(1.0-n.circ_fail_rate) -
-                                  true_strm_avg[n.node_class()]) \
+        n.pid_error = (n.strm_bw - true_strm_avg[n.node_class()]) \
                          / true_strm_avg[n.node_class()]
       else:
-        n.pid_error = (n.filt_bw*(1.0-n.circ_fail_rate) -
-                                  true_filt_avg[n.node_class()]) \
+        n.pid_error = (n.filt_bw - true_filt_avg[n.node_class()]) \
                          / true_filt_avg[n.node_class()]
 
+      # Penalize nodes for circ failure rate
+      if cs_junk.use_circ_fails:
+        circ_error = ((1.0-n.circ_fail_rate) - true_circ_avg[n.node_class()]) \
+                        / true_circ_avg[n.node_class()]
+        # FIXME: Hrmm, should we only penalize for circ successes, or should
+        # we reward, too? Let's try both for now.
+        # if circ_error < 0:
+        n.pid_error += circ_error
+
       if n.idhex in prev_votes.vote_map:
         # If there is a new sample, let's use it for all but guards
         if n.measured_at > prev_votes.vote_map[n.idhex].measured_at:

    

mikeperry＠torproject.org

tags

participants (1)