[or-cvs] r12440: Mess with the formula for the Guard flag again. Now it requi (in tor/trunk: . doc doc/spec src/common src/or)

nickm at seul.org nickm at seul.org
Thu Nov 8 16:58:59 UTC 2007


Author: nickm
Date: 2007-11-08 11:58:59 -0500 (Thu, 08 Nov 2007)
New Revision: 12440

Modified:
   tor/trunk/
   tor/trunk/ChangeLog
   tor/trunk/doc/TODO
   tor/trunk/doc/spec/dir-spec.txt
   tor/trunk/src/common/container.c
   tor/trunk/src/common/container.h
   tor/trunk/src/or/dirserv.c
   tor/trunk/src/or/or.h
   tor/trunk/src/or/rephist.c
Log:
 r16573 at catbus:  nickm | 2007-11-08 11:57:16 -0500
 Mess with the formula for the Guard flag again.  Now it requires that you be in the most familiar 7/8 of nodes, and have above median wfu for that 7/8th.  See spec for details.  Also, log thresholds better.



Property changes on: tor/trunk
___________________________________________________________________
 svk:merge ticket from /tor/trunk [r16573] on 8246c3cf-6607-4228-993b-4d95d33730f1

Modified: tor/trunk/ChangeLog
===================================================================
--- tor/trunk/ChangeLog	2007-11-08 16:53:52 UTC (rev 12439)
+++ tor/trunk/ChangeLog	2007-11-08 16:58:59 UTC (rev 12440)
@@ -241,6 +241,10 @@
       should exist before trying to replace the current one.
     - Use a more forgiving schedule for retrying failed consensus
       downloads than for other types.
+    - Adjust the guard selection formula that authorities use a little more:
+      require guards to be in the top 7/8 in terms of how long we have
+      known about them, and above the median of those nodes in terms of
+      weighted fractional uptime.
 
   o Minor bugfixes (other directory issues):
     - Correct the implementation of "download votes by digest." Bugfix on

Modified: tor/trunk/doc/TODO
===================================================================
--- tor/trunk/doc/TODO	2007-11-08 16:53:52 UTC (rev 12439)
+++ tor/trunk/doc/TODO	2007-11-08 16:58:59 UTC (rev 12440)
@@ -23,7 +23,7 @@
     after we've picked it". We should write a real proposal for this --
     in 0.2.1.x.
     - Delay the separation of flags till 0.2.1.x. -NM
-    - Let's come up with a good formula for Guard.
+    o Let's come up with a good formula for Guard.
 
 Here's a go:
 

Modified: tor/trunk/doc/spec/dir-spec.txt
===================================================================
--- tor/trunk/doc/spec/dir-spec.txt	2007-11-08 16:53:52 UTC (rev 12439)
+++ tor/trunk/doc/spec/dir-spec.txt	2007-11-08 16:58:59 UTC (rev 12440)
@@ -972,11 +972,11 @@
    "Running" -- A router is 'Running' if the authority managed to connect to
    it successfully within the last 30 minutes.
 
-   "Stable" -- A router is 'Stable' if it is active, and either its
-   Weighted MTBF is at least the median for known active routers or
-   its Weighted MTBF is at least 10 days. Routers are never called Stable if
-   they are running a version of Tor known to drop circuits stupidly.
-   (0.1.1.10-alpha through 0.1.1.16-rc are stupid this way.)
+   "Stable" -- A router is 'Stable' if it is active, and either its Weighted
+   MTBF is at least the median for known active routers or its Weighted MTBF
+   corresponds to at least 7 days. Routers are never called Stable if they are
+   running a version of Tor known to drop circuits stupidly.  (0.1.1.10-alpha
+   through 0.1.1.16-rc are stupid this way.)
 
         To calculate weighted MTBF, compute the weighted mean of the lengths
         of all intervals when the router was observed to be up, weighting
@@ -991,9 +991,9 @@
    either in the top 7/8ths for known active routers or at least 100KB/s.
 
    "Guard" -- A router is a possible 'Guard' if its Weighted Fractional
-   Uptime is at least the median for known active routers, and its bandwidth
-   is either at least the median for known active routers or at least
-   250KB/s. If the total bandwidth of active non-BadExit Exit servers is less
+   Uptime is at least the median for "familiar" active routers, and if
+   its bandwidth is at least median or at least 250KB/s.
+   If the total bandwidth of active non-BadExit Exit servers is less
    than one third of the total bandwidth of all active servers, no Exit is
    listed as a Guard.
 
@@ -1001,6 +1001,9 @@
         of time that the router is up in any given day, weighting so that
         downtime and uptime in the past counts less.
 
+        A node is 'familiar' if 1/8 of all active nodes have appeared more
+        recently than it, OR it has been around for a few weeks.
+
    "Authority" -- A router is called an 'Authority' if the authority
    generating the network-status document believes it is an authority.
 

Modified: tor/trunk/src/common/container.c
===================================================================
--- tor/trunk/src/common/container.c	2007-11-08 16:53:52 UTC (rev 12439)
+++ tor/trunk/src/common/container.c	2007-11-08 16:58:59 UTC (rev 12440)
@@ -1214,4 +1214,5 @@
 IMPLEMENT_ORDER_FUNC(find_nth_time, time_t)
 IMPLEMENT_ORDER_FUNC(find_nth_double, double)
 IMPLEMENT_ORDER_FUNC(find_nth_uint32, uint32_t)
+IMPLEMENT_ORDER_FUNC(find_nth_long, long)
 

Modified: tor/trunk/src/common/container.h
===================================================================
--- tor/trunk/src/common/container.h	2007-11-08 16:53:52 UTC (rev 12439)
+++ tor/trunk/src/common/container.h	2007-11-08 16:58:59 UTC (rev 12440)
@@ -322,6 +322,7 @@
 time_t find_nth_time(time_t *array, int n_elements, int nth);
 double find_nth_double(double *array, int n_elements, int nth);
 uint32_t find_nth_uint32(uint32_t *array, int n_elements, int nth);
+long find_nth_long(long *array, int n_elements, int nth);
 static INLINE int
 median_int(int *array, int n_elements)
 {
@@ -342,6 +343,11 @@
 {
   return find_nth_uint32(array, n_elements, (n_elements-1)/2);
 }
+static INLINE long
+median_long(long *array, int n_elements)
+{
+  return find_nth_long(array, n_elements, (n_elements-1)/2);
+}
 
 #endif
 

Modified: tor/trunk/src/or/dirserv.c
===================================================================
--- tor/trunk/src/or/dirserv.c	2007-11-08 16:53:52 UTC (rev 12439)
+++ tor/trunk/src/or/dirserv.c	2007-11-08 16:58:59 UTC (rev 12440)
@@ -1459,14 +1459,22 @@
  * current guards. */
 #define UPTIME_TO_GUARANTEE_STABLE (3600*24*30)
 /** If a router's MTBF is at least this value, then it is always stable.
- * See above. */
-#define MTBF_TO_GUARANTEE_STABLE (60*60*24*10)
+ * See above.  (Corresponds to about 7 days for current decay rates.) */
+#define MTBF_TO_GUARANTEE_STABLE (60*60*24*5)
 /** Similarly, we protect sufficiently fast nodes from being pushed
  * out of the set of Fast nodes. */
 #define BANDWIDTH_TO_GUARANTEE_FAST (100*1024)
 /** Similarly, every node with sufficient bandwidth can be considered
  * for Guard status. */
 #define BANDWIDTH_TO_GUARANTEE_GUARD (250*1024)
+/** Similarly, every node with at least this much weighted time known can be
+ * considered familiar enough to be a guard.  Corresponds to about 20 days for
+ * current decay rates.
+ */
+#define TIME_KNOWN_TO_GUARANTEE_FAMILIAR (8*24*60*60)
+/** Similarly, every node with sufficient WFU is around enough to be a guard.
+ */
+#define WFU_TO_GUARANTEE_GUARD (0.995)
 
 /* Thresholds for server performance: set by
  * dirserv_compute_performance_thresholds, and used by
@@ -1475,6 +1483,7 @@
 static double stable_mtbf = 0.0;
 static int enough_mtbf_info = 0;
 static double guard_wfu = 0.0;
+static long guard_tk = 0;
 static uint32_t fast_bandwidth = 0;
 static uint32_t guard_bandwidth_including_exits = 0;
 static uint32_t guard_bandwidth_excluding_exits = 0;
@@ -1514,13 +1523,13 @@
     } else {
       double mtbf =
         rep_hist_get_stability(router->cache_info.identity_digest, now);
-      if (mtbf < stable_mtbf && mtbf < MTBF_TO_GUARANTEE_STABLE)
+      if (mtbf < stable_mtbf)
         return 1;
     }
   }
   if (need_capacity) {
     uint32_t bw = router_get_advertised_bandwidth(router);
-    if (bw < fast_bandwidth && bw < BANDWIDTH_TO_GUARANTEE_FAST)
+    if (bw < fast_bandwidth)
       return 1;
   }
   return 0;
@@ -1550,16 +1559,22 @@
 static void
 dirserv_compute_performance_thresholds(routerlist_t *rl)
 {
-  int n_active, n_active_nonexit;
+  int n_active, n_active_nonexit, n_familiar;
   uint32_t *uptimes, *bandwidths, *bandwidths_excluding_exits;
+  long *tks;
   double *mtbfs, *wfus;
   time_t now = time(NULL);
 
+  /* DOCDOC this is a litle tricky; comment this function better. */
+
   /* initialize these all here, in case there are no routers */
   stable_uptime = 0;
+  stable_mtbf = 0;
   fast_bandwidth = 0;
   guard_bandwidth_including_exits = 0;
   guard_bandwidth_excluding_exits = 0;
+  guard_tk = 0;
+  guard_wfu = 0;
 
   total_bandwidth = 0;
   total_exit_bandwidth = 0;
@@ -1570,6 +1585,7 @@
   bandwidths_excluding_exits =
     tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers));
   mtbfs = tor_malloc(sizeof(double)*smartlist_len(rl->routers));
+  tks = tor_malloc(sizeof(long)*smartlist_len(rl->routers));
   wfus = tor_malloc(sizeof(double)*smartlist_len(rl->routers));
 
   SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, {
@@ -1579,7 +1595,7 @@
       ri->is_exit = exit_policy_is_general_exit(ri->exit_policy);
       uptimes[n_active] = real_uptime(ri, now);
       mtbfs[n_active] = rep_hist_get_stability(id, now);
-      wfus [n_active] = rep_hist_get_weighted_fractional_uptime(id, now);
+      tks  [n_active] = rep_hist_get_weighted_time_known(id, now);
       bandwidths[n_active] = bw = router_get_advertised_bandwidth(ri);
       total_bandwidth += bw;
       if (ri->is_exit && !ri->is_bad_exit) {
@@ -1595,14 +1611,35 @@
   if (n_active) {
     stable_uptime = median_uint32(uptimes, n_active);
     stable_mtbf = median_double(mtbfs, n_active);
-    guard_wfu = median_double(wfus, n_active);
     fast_bandwidth = find_nth_uint32(bandwidths, n_active, n_active/8);
     /* Now bandwidths is sorted. */
     if (fast_bandwidth < ROUTER_REQUIRED_MIN_BANDWIDTH)
       fast_bandwidth = bandwidths[n_active/4];
     guard_bandwidth_including_exits = bandwidths[(n_active-1)/2];
+    guard_tk = find_nth_long(tks, n_active, n_active/8);
   }
 
+  if (guard_tk > TIME_KNOWN_TO_GUARANTEE_FAMILIAR)
+    guard_tk = TIME_KNOWN_TO_GUARANTEE_FAMILIAR;
+
+  if (fast_bandwidth > BANDWIDTH_TO_GUARANTEE_FAST)
+    fast_bandwidth = BANDWIDTH_TO_GUARANTEE_FAST;
+
+  n_familiar = 0;
+  SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, {
+      if (router_is_active(ri, now)) {
+        const char *id = ri->cache_info.identity_digest;
+        long tk = rep_hist_get_weighted_time_known(id, now);
+        if (tk < guard_tk)
+          continue;
+        wfus[n_familiar++] = rep_hist_get_weighted_fractional_uptime(id, now);
+      }
+    });
+  if (n_familiar)
+    guard_wfu = median_double(wfus, n_familiar);
+  if (guard_wfu > WFU_TO_GUARANTEE_GUARD)
+    guard_wfu = WFU_TO_GUARANTEE_GUARD;
+
   enough_mtbf_info = rep_hist_have_measured_enough_stability();
 
   if (n_active_nonexit) {
@@ -1610,19 +1647,25 @@
       median_uint32(bandwidths_excluding_exits, n_active_nonexit);
   }
 
-  /*XXXX020 Log the other stuff too. */
   log(LOG_INFO, LD_DIRSERV,
-      "Cutoffs: %lus uptime, %lu b/s fast, %lu or %lu b/s guard.",
+      "Cutoffs: For Stable, %lu sec uptime, %lu sec MTBF. "
+      "For Fast: %lu bytes/sec."
+      "For Guard: WFU %.03lf%%, time-known %lu sec, "
+      "and bandwidth %lu or %lu bytes/sec.",
       (unsigned long)stable_uptime,
+      (unsigned long)stable_mtbf,
       (unsigned long)fast_bandwidth,
+      guard_wfu*100,
+      (unsigned long)guard_tk,
       (unsigned long)guard_bandwidth_including_exits,
       (unsigned long)guard_bandwidth_excluding_exits);
 
   tor_free(uptimes);
   tor_free(mtbfs);
-  tor_free(wfus);
   tor_free(bandwidths);
   tor_free(bandwidths_excluding_exits);
+  tor_free(tks);
+  tor_free(wfus);
 }
 
 /** Given a platform string as in a routerinfo_t (possibly null), return a
@@ -1852,9 +1895,11 @@
        router_get_advertised_bandwidth(ri) >=
        (exits_can_be_guards ? guard_bandwidth_including_exits :
         guard_bandwidth_excluding_exits))) {
+    long tk = rep_hist_get_weighted_time_known(
+                                      ri->cache_info.identity_digest, now);
     double wfu = rep_hist_get_weighted_fractional_uptime(
                                       ri->cache_info.identity_digest, now);
-    rs->is_possible_guard = (wfu >= guard_wfu) ? 1 : 0;
+    rs->is_possible_guard = (wfu >= guard_wfu && tk >= guard_tk) ? 1 : 0;
   } else {
     rs->is_possible_guard = 0;
   }

Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h	2007-11-08 16:53:52 UTC (rev 12439)
+++ tor/trunk/src/or/or.h	2007-11-08 16:58:59 UTC (rev 12440)
@@ -3441,6 +3441,7 @@
 time_t rep_hist_downrate_old_runs(time_t now);
 double rep_hist_get_stability(const char *id, time_t when);
 double rep_hist_get_weighted_fractional_uptime(const char *id, time_t when);
+long rep_hist_get_weighted_time_known(const char *id, time_t when);
 int rep_hist_have_measured_enough_stability(void);
 
 void rep_hist_note_used_port(uint16_t port, time_t now);

Modified: tor/trunk/src/or/rephist.c
===================================================================
--- tor/trunk/src/or/rephist.c	2007-11-08 16:53:52 UTC (rev 12439)
+++ tor/trunk/src/or/rephist.c	2007-11-08 16:58:59 UTC (rev 12440)
@@ -384,8 +384,18 @@
   return total / total_weights;
 }
 
-/* Until we've known about you for this long, you simply can't be up. */
-#define MIN_WEIGHTED_TIME_TO_BE_UP (18*60*60)
+/** DODDOC */
+static long
+get_total_weighted_time(or_history_t *hist, time_t when)
+{
+  long total = hist->total_weighted_time;
+  if (hist->start_of_run) {
+    total += (when - hist->start_of_run);
+  } else if (hist->start_of_downtime) {
+    total += (when - hist->start_of_downtime);
+  }
+  return total;
+}
 
 /** Helper: Return the weighted percent-of-time-online of the router with
  * history <b>hist</b>. */
@@ -402,8 +412,6 @@
   } else if (hist->start_of_downtime) {
     total += (when - hist->start_of_downtime);
   }
-  if (total < MIN_WEIGHTED_TIME_TO_BE_UP)
-    return 0.0;
   return ((double) up) / total;
 }
 
@@ -431,6 +439,22 @@
   return get_weighted_fractional_uptime(hist, when);
 }
 
+/** Return a number representing how long we've known about the router whose
+ * digest is <b>id</b>. Return 0 if the router is unknown.
+ *
+ * Be careful: this measure incresases monotonically as we know the router for
+ * longer and longer, but it doesn't increase linearly.
+ */
+long
+rep_hist_get_weighted_time_known(const char *id, time_t when)
+{
+  or_history_t *hist = get_or_history(id);
+  if (!hist)
+    return 0;
+
+  return get_total_weighted_time(hist, when);
+}
+
 /** Return true if we've been measuring MTBFs for long enough to
  * prounounce on Stability. */
 int



More information about the tor-commits mailing list