[tor-commits] [tor/master] Change smartlist_choose_node_by_bandwidth to avoid double

nickm at torproject.org nickm at torproject.org
Tue Sep 11 21:52:39 UTC 2012


commit e106812a778f53760c819ab20a214ac3222b3b15
Author: Nick Mathewson <nickm at torproject.org>
Date:   Thu Aug 9 12:21:37 2012 -0400

    Change smartlist_choose_node_by_bandwidth to avoid double
    
    This should make our preferred solution to #6538 easier to
    implement, avoid a bunch of potential nastiness with excessive
    int-vs-double math, and generally make the code there a little less
    scary.
    
    "But wait!" you say.  "Is it really safe to do this? Won't the
    results come out differently?"
    
    Yes, but not much.  We now round every weighted bandwidth to the
    nearest byte before computing on it.  This will make every node that
    had a fractional part of its weighted bandwidth before either
    slighty more likely or slightly less likely.  Further, the rand_bw
    value was only ever set with integer precision, so it can't
    accurately sample routers with tiny fractional bandwidth values
    anyway.  Finally, doing repeated double-vs-uint64 comparisons is
    just plain sad; it will involve an implicit cast to double, which is
    never a fun thing.
---
 changes/bug6538     |    4 ++++
 configure.in        |    1 +
 src/common/util.c   |   15 +++++++++++++++
 src/common/util.h   |    1 +
 src/or/routerlist.c |   48 ++++++++++++++++++++++++++++--------------------
 5 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/changes/bug6538 b/changes/bug6538
new file mode 100644
index 0000000..1e882eb
--- /dev/null
+++ b/changes/bug6538
@@ -0,0 +1,4 @@
+  o Minor bugfixes:
+    - Switch weighted node selection rule from using a list of doubles
+      to using a list of int64_t. This should make the process slightly
+      easier to debug and maintain. Needed for fix for bug 6538.
diff --git a/configure.in b/configure.in
index 1db7d08..f5f8eac 100644
--- a/configure.in
+++ b/configure.in
@@ -300,6 +300,7 @@ AC_CHECK_FUNCS(
         gmtime_r \
         inet_aton \
         ioctl \
+        llround \
         localtime_r \
         lround \
         memmem \
diff --git a/src/common/util.c b/src/common/util.c
index a0dff2e..60222f4 100644
--- a/src/common/util.c
+++ b/src/common/util.c
@@ -378,6 +378,21 @@ tor_lround(double d)
 #endif
 }
 
+/** Return the 64-bit integer closest to d.  We define this wrapper here so
+ * that not all users of math.h need to use the right incancations to get the
+ * c99 functions. */
+int64_t
+tor_llround(double d)
+{
+#if defined(HAVE_LLROUND)
+  return (int64_t)llround(d);
+#elif defined(HAVE_RINT)
+  return (int64_t)rint(d);
+#else
+  return (int64_t)(d > 0 ? d + 0.5 : ceil(d - 0.5));
+#endif
+}
+
 /** Returns floor(log2(u64)).  If u64 is 0, (incorrectly) returns 0. */
 int
 tor_log2(uint64_t u64)
diff --git a/src/common/util.h b/src/common/util.h
index a2ab0cc..f700e9f 100644
--- a/src/common/util.h
+++ b/src/common/util.h
@@ -161,6 +161,7 @@ void tor_log_mallinfo(int severity);
 /* Math functions */
 double tor_mathlog(double d) ATTR_CONST;
 long tor_lround(double d) ATTR_CONST;
+int64_t tor_llround(double d) ATTR_CONST;
 int tor_log2(uint64_t u64) ATTR_CONST;
 uint64_t round_to_power_of_2(uint64_t u64);
 unsigned round_to_next_multiple_of(unsigned number, unsigned divisor);
diff --git a/src/or/routerlist.c b/src/or/routerlist.c
index 4979b93..382d457 100644
--- a/src/or/routerlist.c
+++ b/src/or/routerlist.c
@@ -1702,12 +1702,12 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
                                            bandwidth_weight_rule_t rule)
 {
   int64_t weight_scale;
-  int64_t rand_bw;
+  uint64_t rand_bw;
   double Wg = -1, Wm = -1, We = -1, Wd = -1;
   double Wgb = -1, Wmb = -1, Web = -1, Wdb = -1;
-  double weighted_bw = 0, unweighted_bw = 0;
-  double *bandwidths;
-  double tmp = 0;
+  uint64_t weighted_bw = 0, unweighted_bw = 0;
+  uint64_t *bandwidths;
+  uint64_t tmp;
   unsigned int i;
   unsigned int i_chosen;
   unsigned int i_has_been_chosen;
@@ -1792,7 +1792,7 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
   Web /= weight_scale;
   Wdb /= weight_scale;
 
-  bandwidths = tor_malloc_zero(sizeof(double)*smartlist_len(sl));
+  bandwidths = tor_malloc_zero(sizeof(uint64_t)*smartlist_len(sl));
 
   // Cycle through smartlist and total the bandwidth.
   SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) {
@@ -1831,24 +1831,30 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
     } else { // middle
       weight = (is_dir ? Wmb*Wm : Wm);
     }
-
-    bandwidths[node_sl_idx] = weight*this_bw;
-    weighted_bw += weight*this_bw;
+    /* These should be impossible; but overflows here would be bad, so let's
+     * make sure. */
+    if (this_bw < 0)
+      this_bw = 0;
+    if (weight < 0.0)
+      weight = 0.0;
+
+    bandwidths[node_sl_idx] = tor_llround(weight*this_bw + 0.5);
+    weighted_bw += bandwidths[node_sl_idx];
     unweighted_bw += this_bw;
     if (is_me)
-      sl_last_weighted_bw_of_me = weight*this_bw;
+      sl_last_weighted_bw_of_me = bandwidths[node_sl_idx];
   } SMARTLIST_FOREACH_END(node);
 
   /* XXXX this is a kludge to expose these values. */
   sl_last_total_weighted_bw = weighted_bw;
 
   log_debug(LD_CIRC, "Choosing node for rule %s based on weights "
-            "Wg=%f Wm=%f We=%f Wd=%f with total bw %f",
+            "Wg=%f Wm=%f We=%f Wd=%f with total bw "U64_FORMAT,
             bandwidth_weight_rule_to_string(rule),
-            Wg, Wm, We, Wd, weighted_bw);
+            Wg, Wm, We, Wd, U64_PRINTF_ARG(weighted_bw));
 
   /* If there is no bandwidth, choose at random */
-  if (DBL_TO_U64(weighted_bw) == 0) {
+  if (weighted_bw == 0) {
     /* Don't warn when using bridges/relays not in the consensus */
     if (!have_unknown) {
 #define ZERO_BANDWIDTH_WARNING_INTERVAL (15)
@@ -1858,24 +1864,25 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
       if ((msg = rate_limit_log(&zero_bandwidth_warning_limit,
                                 approx_time()))) {
         log_warn(LD_CIRC,
-                 "Weighted bandwidth is %f in node selection for rule %s "
-                 "(unweighted was %f) %s",
-                 weighted_bw, bandwidth_weight_rule_to_string(rule),
-                 unweighted_bw, msg);
+                 "Weighted bandwidth is "U64_FORMAT" in node selection for "
+                 "rule %s (unweighted was "U64_FORMAT") %s",
+                 U64_PRINTF_ARG(weighted_bw),
+                 bandwidth_weight_rule_to_string(rule),
+                 U64_PRINTF_ARG(unweighted_bw), msg);
       }
     }
     tor_free(bandwidths);
     return smartlist_choose(sl);
   }
 
-  rand_bw = crypto_rand_uint64(DBL_TO_U64(weighted_bw));
+  rand_bw = crypto_rand_uint64(weighted_bw);
   rand_bw++; /* crypto_rand_uint64() counts from 0, and we need to count
               * from 1 below. See bug 1203 for details. */
 
   /* Last, count through sl until we get to the element we picked */
   i_chosen = (unsigned)smartlist_len(sl);
   i_has_been_chosen = 0;
-  tmp = 0.0;
+  tmp = 0;
   for (i=0; i < (unsigned)smartlist_len(sl); i++) {
     tmp += bandwidths[i];
     if (tmp >= rand_bw && !i_has_been_chosen) {
@@ -1892,8 +1899,9 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
     --i;
     log_warn(LD_BUG, "Round-off error in computing bandwidth had an effect on "
              " which router we chose. Please tell the developers. "
-             "%f " U64_FORMAT " %f", tmp, U64_PRINTF_ARG(rand_bw),
-             weighted_bw);
+             U64_FORMAT" "U64_FORMAT" "U64_FORMAT,
+             U64_PRINTF_ARG(tmp), U64_PRINTF_ARG(rand_bw),
+             U64_PRINTF_ARG(weighted_bw));
   }
   tor_free(bandwidths);
   return smartlist_get(sl, i);





More information about the tor-commits mailing list