[or-cvs] r11457: Unify all of the divergent median/nth-percentile code in con (in tor/trunk: . src/common src/or)

nickm at seul.org nickm at seul.org
Mon Sep 17 18:27:50 UTC 2007


Author: nickm
Date: 2007-09-17 14:27:49 -0400 (Mon, 17 Sep 2007)
New Revision: 11457

Modified:
   tor/trunk/
   tor/trunk/ChangeLog
   tor/trunk/src/common/container.c
   tor/trunk/src/common/container.h
   tor/trunk/src/or/dirserv.c
   tor/trunk/src/or/dirvote.c
   tor/trunk/src/or/or.h
   tor/trunk/src/or/test.c
Log:
 r14448 at Kushana:  nickm | 2007-09-17 14:26:56 -0400
 Unify all of the divergent median/nth-percentile code in container.[ch]



Property changes on: tor/trunk
___________________________________________________________________
 svk:merge ticket from /tor/trunk [r14448] on c95137ef-5f19-0410-b913-86e773d04f59

Modified: tor/trunk/ChangeLog
===================================================================
--- tor/trunk/ChangeLog	2007-09-17 18:27:43 UTC (rev 11456)
+++ tor/trunk/ChangeLog	2007-09-17 18:27:49 UTC (rev 11457)
@@ -45,6 +45,7 @@
       of a file in memory at once before we write to disk.  Tor, meet stdio.
     - Turn "descriptor store" into a full-fledged type.
     - Move all NT services code into a separate source file.
+    - Unify all code that computes medians, percentile elements, etc.
 
 
 Changes in version 0.1.2.17 - 2007-08-30

Modified: tor/trunk/src/common/container.c
===================================================================
--- tor/trunk/src/common/container.c	2007-09-17 18:27:43 UTC (rev 11456)
+++ tor/trunk/src/common/container.c	2007-09-17 18:27:49 UTC (rev 11457)
@@ -1133,3 +1133,35 @@
   return HT_SIZE(&map->head);
 }
 
+/** declare a function called <b>funcname</b> that acts as a find_nth_XXX
+ * function for an array of type <b>elt_t</b>*.
+ *
+ * NOTE: The implementation kind of sucks: It's O(n log n), whereas finding
+ * the nth element of a list can be done in O(n).  Then again, this
+ * implementation is not in critical path, and it is obviously correct. */
+#define IMPLEMENT_ORDER_FUNC(funcname, elt_t)                   \
+  static int                                                    \
+  _cmp_ ## elt_t(const void *_a, const void *_b)                \
+  {                                                             \
+    const elt_t *a = _a, *b = _b;                               \
+    if (*a<*b)                                                  \
+      return -1;                                                \
+    else if (*a>*b)                                             \
+      return 1;                                                 \
+    else                                                        \
+      return 0;                                                 \
+  }                                                             \
+  elt_t                                                         \
+  funcname(elt_t *array, int n_elements, int nth)               \
+  {                                                             \
+    tor_assert(nth >= 0);                                       \
+    tor_assert(nth < n_elements);                               \
+    qsort(array, n_elements, sizeof(elt_t), _cmp_ ##elt_t);     \
+    return array[nth];                                          \
+  }
+
+IMPLEMENT_ORDER_FUNC(find_nth_int, int)
+IMPLEMENT_ORDER_FUNC(find_nth_time, time_t)
+IMPLEMENT_ORDER_FUNC(find_nth_double, double)
+IMPLEMENT_ORDER_FUNC(find_nth_uint32, uint32_t)
+

Modified: tor/trunk/src/common/container.h
===================================================================
--- tor/trunk/src/common/container.h	2007-09-17 18:27:43 UTC (rev 11456)
+++ tor/trunk/src/common/container.h	2007-09-17 18:27:49 UTC (rev 11457)
@@ -310,5 +310,34 @@
   return b[bit >> BITARRAY_SHIFT] & (1u << (bit & BITARRAY_MASK));
 }
 
+/* These functions, given an <b>array</b> of <b>n_elements</b>, return the
+ * <b>nth</b> lowest element. <b>nth</b>=0 gives the lowest element;
+ * <b>n_elements</b>-1 gives the highest; and (<b>n_elements</b>-1) / 2 gives
+ * the median.  As a side effect, the elements of <b>array</b> are sorted. */
+int find_nth_int(int *array, int n_elements, int nth);
+time_t find_nth_time(time_t *array, int n_elements, int nth);
+double find_nth_double(double *array, int n_elements, int nth);
+uint32_t find_nth_uint32(uint32_t *array, int n_elements, int nth);
+static INLINE int
+median_int(int *array, int n_elements)
+{
+  return find_nth_int(array, n_elements, (n_elements-1)/2);
+}
+static INLINE time_t
+median_time(time_t *array, int n_elements)
+{
+  return find_nth_time(array, n_elements, (n_elements-1)/2);
+}
+static INLINE double
+median_double(double *array, int n_elements)
+{
+  return find_nth_double(array, n_elements, (n_elements-1)/2);
+}
+static INLINE uint32_t
+median_uint32(uint32_t *array, int n_elements)
+{
+  return find_nth_uint32(array, n_elements, (n_elements-1)/2);
+}
+
 #endif
 

Modified: tor/trunk/src/or/dirserv.c
===================================================================
--- tor/trunk/src/or/dirserv.c	2007-09-17 18:27:43 UTC (rev 11456)
+++ tor/trunk/src/or/dirserv.c	2007-09-17 18:27:49 UTC (rev 11457)
@@ -1569,28 +1569,6 @@
   return 0;
 }
 
-/** Helper: returns a tristate based on comparing **(uint32_t**)<b>a</b>
- * to **(uint32_t**)<b>b</b>. */
-static int
-_compare_uint32(const void **a, const void **b)
-{
-  uint32_t first = **(uint32_t **)a, second = **(uint32_t **)b;
-  if (first < second) return -1;
-  if (first > second) return 1;
-  return 0;
-}
-
-/** Helper: returns a tristate based on comparing **(double**)<b>a</b>
- * to **(double**)<b>b</b>. */
-static int
-_compare_double(const void **a, const void **b)
-{
-  double first = **(double **)a, second = **(double **)b;
-  if (first < second) return -1;
-  if (first > second) return 1;
-  return 0;
-}
-
 /** Look through the routerlist, and assign the median uptime of running valid
  * servers to stable_uptime, and the relative bandwidth capacities to
  * fast_bandwidth and guard_bandwidth.  Set total_bandwidth to the total
@@ -1600,7 +1578,9 @@
 static void
 dirserv_compute_performance_thresholds(routerlist_t *rl)
 {
-  smartlist_t *uptimes, *mtbfs, *bandwidths, *bandwidths_excluding_exits;
+  int n_active, n_active_nonexit;
+  uint32_t *uptimes, *bandwidths, *bandwidths_excluding_exits;
+  double *mtbfs;
   time_t now = time(NULL);
 
   /* initialize these all here, in case there are no routers */
@@ -1612,63 +1592,48 @@
   total_bandwidth = 0;
   total_exit_bandwidth = 0;
 
-  uptimes = smartlist_create();
-  mtbfs = smartlist_create();
-  bandwidths = smartlist_create();
-  bandwidths_excluding_exits = smartlist_create();
+  n_active = n_active_nonexit = 0;
+  uptimes = tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers));
+  bandwidths = tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers));
+  bandwidths_excluding_exits =
+    tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers));
+  mtbfs = tor_malloc(sizeof(double)*smartlist_len(rl->routers));
 
   /* XXXX020 we should just use arrays and qsort.  */
   SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, {
     if (router_is_active(ri, now)) {
-      uint32_t *up = tor_malloc(sizeof(uint32_t));
-      uint32_t *bw = tor_malloc(sizeof(uint32_t));
-      uint32_t *mtbf = tor_malloc(sizeof(double));
+      const char *id = ri->cache_info.identity_digest;
+      uint32_t bw;
       ri->is_exit = exit_policy_is_general_exit(ri->exit_policy);
-      *up = (uint32_t) real_uptime(ri, now);
-      smartlist_add(uptimes, up);
-      *mtbf = rep_hist_get_stability(ri->cache_info.identity_digest, now);
-      smartlist_add(mtbfs, mtbf);
-      *bw = router_get_advertised_bandwidth(ri);
-      total_bandwidth += *bw;
+      uptimes[n_active] = real_uptime(ri, now);
+      mtbfs[n_active] = rep_hist_get_stability(id, now);
+      bandwidths[n_active] = bw = router_get_advertised_bandwidth(ri);
+      total_bandwidth += bw;
       if (ri->is_exit && !ri->is_bad_exit) {
-        total_exit_bandwidth += *bw;
+        total_exit_bandwidth += bw;
       } else {
-        uint32_t *bw_not_exit = tor_malloc(sizeof(uint32_t));
-        *bw_not_exit = *bw;
-        smartlist_add(bandwidths_excluding_exits, bw_not_exit);
+        bandwidths_excluding_exits[n_active_nonexit] = bw;
+        ++n_active_nonexit;
       }
-      smartlist_add(bandwidths, bw);
+      ++n_active;
     }
   });
 
-  smartlist_sort(uptimes, _compare_uint32);
-  smartlist_sort(mtbfs, _compare_double);
-  smartlist_sort(bandwidths, _compare_uint32);
-  smartlist_sort(bandwidths_excluding_exits, _compare_uint32);
-
-  if (smartlist_len(uptimes))
-    stable_uptime = *(uint32_t*)smartlist_get(uptimes,
-                                              smartlist_len(uptimes)/2);
-
-  if (smartlist_len(mtbfs))
-    stable_mtbf = *(double*)smartlist_get(mtbfs,
-                                          smartlist_len(mtbfs)/2);
-  enough_mtbf_info = rep_hist_have_measured_enough_stability();
-
-  if (smartlist_len(bandwidths)) {
-    fast_bandwidth = *(uint32_t*)smartlist_get(bandwidths,
-                                               smartlist_len(bandwidths)/8);
+  if (n_active) {
+    stable_uptime = median_uint32(uptimes, n_active);
+    stable_mtbf = median_double(mtbfs, n_active);
+    fast_bandwidth = find_nth_uint32(bandwidths, n_active, n_active/8);
+    /* Now bandwidths is sorted. */
     if (fast_bandwidth < ROUTER_REQUIRED_MIN_BANDWIDTH)
-      fast_bandwidth = *(uint32_t*)smartlist_get(bandwidths,
-                                                 smartlist_len(bandwidths)/4);
-    guard_bandwidth_including_exits =
-      *(uint32_t*)smartlist_get(bandwidths, smartlist_len(bandwidths)/2);
+      fast_bandwidth = bandwidths[n_active/4];
+    guard_bandwidth_including_exits = bandwidths[(n_active-1)/2];
   }
 
-  if (smartlist_len(bandwidths_excluding_exits)) {
+  enough_mtbf_info = rep_hist_have_measured_enough_stability();
+
+  if (n_active_nonexit) {
     guard_bandwidth_excluding_exits =
-      *(uint32_t*)smartlist_get(bandwidths_excluding_exits,
-                                smartlist_len(bandwidths_excluding_exits)/2);
+      median_uint32(bandwidths_excluding_exits, n_active_nonexit);
   }
 
   log(LOG_INFO, LD_DIRSERV,
@@ -1678,14 +1643,10 @@
       (unsigned long)guard_bandwidth_including_exits,
       (unsigned long)guard_bandwidth_excluding_exits);
 
-  SMARTLIST_FOREACH(uptimes, uint32_t *, up, tor_free(up));
-  SMARTLIST_FOREACH(mtbfs, double *, mtbf, tor_free(mtbf));
-  SMARTLIST_FOREACH(bandwidths, uint32_t *, bw, tor_free(bw));
-  SMARTLIST_FOREACH(bandwidths_excluding_exits, uint32_t *, bw, tor_free(bw));
-  smartlist_free(uptimes);
-  smartlist_free(mtbfs);
-  smartlist_free(bandwidths);
-  smartlist_free(bandwidths_excluding_exits);
+  tor_free(uptimes);
+  tor_free(mtbfs);
+  tor_free(bandwidths);
+  tor_free(bandwidths_excluding_exits);
 }
 
 /** Given a platform string as in a routerinfo_t (possibly null), return a

Modified: tor/trunk/src/or/dirvote.c
===================================================================
--- tor/trunk/src/or/dirvote.c	2007-09-17 18:27:43 UTC (rev 11456)
+++ tor/trunk/src/or/dirvote.c	2007-09-17 18:27:49 UTC (rev 11457)
@@ -81,54 +81,6 @@
   return NULL;
 }
 
-/** Helper for sorting a list of time_t*. */
-static int
-_compare_times(const void **_a, const void **_b)
-{
-  const time_t *a = *_a, *b = *_b;
-  if (*a<*b)
-    return -1;
-  else if (*a>*b)
-    return 1;
-  else
-    return 0;
-}
-
-/** Helper for sorting a list of int*. */
-static int
-_compare_ints(const void **_a, const void **_b)
-{
-  const int *a = *_a, *b = *_b;
-  if (*a<*b)
-    return -1;
-  else if (*a>*b)
-    return 1;
-  else
-    return 0;
-}
-
-/** Given a list of one or more time_t*, return the (low) median. */
-/*static*/ time_t
-median_time(smartlist_t *times)
-{
-  int idx;
-  tor_assert(smartlist_len(times));
-  smartlist_sort(times, _compare_times);
-  idx = (smartlist_len(times)-1)/2;
-  return *(time_t*)smartlist_get(times, idx);
-}
-
-/** Given a list of one or more int*, return the (low) median. */
-/*static*/ int
-median_int(smartlist_t *ints)
-{
-  int idx;
-  tor_assert(smartlist_len(ints));
-  smartlist_sort(ints, _compare_ints);
-  idx = (smartlist_len(ints)-1)/2;
-  return *(int*)smartlist_get(ints, idx);
-}
-
 /** Given a vote <b>vote</b> (not a consensus!), return its associated
  * networkstatus_voter_info_t.*/
 static networkstatus_voter_info_t *
@@ -322,11 +274,12 @@
   /* Compute medians of time-related things, and figure out how many
    * routers we might need to talk about. */
   {
-    smartlist_t *va_times = smartlist_create();
-    smartlist_t *fu_times = smartlist_create();
-    smartlist_t *vu_times = smartlist_create();
-    smartlist_t *votesec_list = smartlist_create();
-    smartlist_t *distsec_list = smartlist_create();
+    int n_votes = smartlist_len(votes);
+    time_t *va_times = tor_malloc(n_votes * sizeof(time_t));
+    time_t *fu_times = tor_malloc(n_votes * sizeof(time_t));
+    time_t *vu_times = tor_malloc(n_votes * sizeof(time_t));
+    int *votesec_list = tor_malloc(n_votes * sizeof(int));
+    int *distsec_list = tor_malloc(n_votes * sizeof(int));
     int n_versioning_clients = 0, n_versioning_servers = 0;
     smartlist_t *combined_client_versions = smartlist_create();
     smartlist_t *combined_server_versions = smartlist_create();
@@ -334,11 +287,11 @@
     SMARTLIST_FOREACH(votes, networkstatus_vote_t *, v,
     {
       tor_assert(v->is_vote);
-      smartlist_add(va_times, &v->valid_after);
-      smartlist_add(fu_times, &v->fresh_until);
-      smartlist_add(vu_times, &v->valid_until);
-      smartlist_add(votesec_list, &v->vote_seconds);
-      smartlist_add(distsec_list, &v->dist_seconds);
+      va_times[v_sl_idx] = v->valid_after;
+      fu_times[v_sl_idx] = v->fresh_until;
+      vu_times[v_sl_idx] = v->valid_until;
+      votesec_list[v_sl_idx] = v->vote_seconds;
+      distsec_list[v_sl_idx] = v->dist_seconds;
       if (v->client_versions) {
         smartlist_t *cv = smartlist_create();
         ++n_versioning_clients;
@@ -360,11 +313,11 @@
       SMARTLIST_FOREACH(v->known_flags, const char *, cp,
                         smartlist_add(flags, tor_strdup(cp)));
     });
-    valid_after = median_time(va_times);
-    fresh_until = median_time(fu_times);
-    valid_until = median_time(vu_times);
-    vote_seconds = median_int(votesec_list);
-    dist_seconds = median_int(distsec_list);
+    valid_after = median_time(va_times, n_votes);
+    fresh_until = median_time(fu_times, n_votes);
+    valid_until = median_time(vu_times, n_votes);
+    vote_seconds = median_int(votesec_list, n_votes);
+    dist_seconds = median_int(distsec_list, n_votes);
 
     /*
     SMARTLIST_FOREACH(va_times, int*, i,
@@ -400,11 +353,11 @@
     smartlist_sort_strings(flags);
     smartlist_uniq_strings(flags);
 
-    smartlist_free(va_times);
-    smartlist_free(fu_times);
-    smartlist_free(vu_times);
-    smartlist_free(votesec_list);
-    smartlist_free(distsec_list);
+    tor_free(va_times);
+    tor_free(fu_times);
+    tor_free(vu_times);
+    tor_free(votesec_list);
+    tor_free(distsec_list);
   }
 
   chunks = smartlist_create();

Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h	2007-09-17 18:27:43 UTC (rev 11456)
+++ tor/trunk/src/or/or.h	2007-09-17 18:27:49 UTC (rev 11457)
@@ -2932,8 +2932,6 @@
 const cached_dir_t *dirvote_get_vote(const char *id);
 
 #ifdef DIRVOTE_PRIVATE
-time_t median_time(smartlist_t *times);
-int median_int(smartlist_t *times);
 int networkstatus_check_voter_signature(networkstatus_vote_t *consensus,
                                         networkstatus_voter_info_t *voter,
                                         authority_cert_t *cert);
@@ -3184,6 +3182,7 @@
 
 time_t rep_hist_downrate_old_runs(time_t now);
 double rep_hist_get_stability(const char *id, time_t when);
+double rep_hist_get_weighted_fractional_uptime(const char *id, time_t when);
 int rep_hist_have_measured_enough_stability(void);
 
 void rep_hist_note_used_port(uint16_t port, time_t now);

Modified: tor/trunk/src/or/test.c
===================================================================
--- tor/trunk/src/or/test.c	2007-09-17 18:27:43 UTC (rev 11456)
+++ tor/trunk/src/or/test.c	2007-09-17 18:27:49 UTC (rev 11457)
@@ -2386,42 +2386,29 @@
 }
 
 static void
-test_util_dirvote_helpers(void)
+test_util_order_functions(void)
 {
-  smartlist_t *sl = smartlist_create();
-  int a=12,b=24,c=25,d=60,e=77;
-  time_t v=99, w=150, x=700, y=1000, z=time(NULL);
+  int lst[25], n = 0;
+  //  int a=12,b=24,c=25,d=60,e=77;
 
-  test_assert(y<z);
-  smartlist_add(sl, &a);
-  test_eq(a, median_int(sl)); /* a */
-  smartlist_add(sl, &e);
-  smartlist_shuffle(sl);
-  test_eq(a, median_int(sl)); /* a,e */
-  smartlist_add(sl, &e);
-  smartlist_shuffle(sl);
-  test_eq(e, median_int(sl)); /* a,e,e */
-  smartlist_add(sl, &b);
-  test_eq(b, median_int(sl)); /* a,b,e,e */
-  smartlist_add(sl, &d);
-  smartlist_add(sl, &a);
-  smartlist_add(sl, &c);
-  smartlist_shuffle(sl);
-  test_eq(c, median_int(sl)); /* a,a,b,c,d,e,e */
+#define median() median_int(lst, n)
 
-  smartlist_clear(sl);
-  smartlist_add(sl, &y);
-  test_eq(y, median_time(sl)); /*y*/
-  smartlist_add(sl, &w);
-  test_eq(w, median_time(sl)); /*w,y*/
-  smartlist_add(sl, &x);
-  test_eq(x, median_time(sl)); /*w,x,y*/
-  smartlist_add(sl, &v);
-  test_eq(w, median_time(sl)); /*v,w,x,y*/
-  smartlist_add(sl, &z);
-  test_eq(x, median_time(sl)); /*v,w,x,y,z*/
-
-  smartlist_free(sl);
+  lst[n++] = 12;
+  test_eq(12, median()); /* 12 */
+  lst[n++] = 77;
+  //smartlist_shuffle(sl);
+  test_eq(12, median()); /* 12, 77 */
+  lst[n++] = 77;
+  //smartlist_shuffle(sl);
+  test_eq(77, median()); /* 12, 77, 77 */
+  lst[n++] = 24;
+  test_eq(24, median()); /* 12,24,77,77 */
+  lst[n++] = 60;
+  lst[n++] = 12;
+  lst[n++] = 25;
+  //smartlist_shuffle(sl);
+  test_eq(25, median()); /* 12,12,24,25,60,77,77 */
+#undef median
 }
 
 static void
@@ -3121,7 +3108,7 @@
   SUBENT(util, pqueue),
   SUBENT(util, mmap),
   SUBENT(util, threads),
-  SUBENT(util, dirvote_helpers),
+  SUBENT(util, order_functions),
   ENT(onion_handshake),
   ENT(dir_format),
   ENT(v3_networkstatus),



More information about the tor-commits mailing list