[or-cvs] Replace balanced trees with hash tables: this should make s...

Nick Mathewson nickm at seul.org
Wed Nov 23 04:18:47 UTC 2005


Update of /home/or/cvsroot/tor/src/or
In directory moria:/tmp/cvs-serv5279/src/or

Modified Files:
	circuitlist.c connection_edge.c dns.c rephist.c test.c 
Log Message:
Replace balanced trees with hash tables: this should make stuff significantly faster.

Index: circuitlist.c
===================================================================
RCS file: /home/or/cvsroot/tor/src/or/circuitlist.c,v
retrieving revision 1.72
retrieving revision 1.73
diff -u -d -r1.72 -r1.73
--- circuitlist.c	19 Nov 2005 01:56:58 -0000	1.72
+++ circuitlist.c	23 Nov 2005 04:18:45 -0000	1.73
@@ -12,10 +12,7 @@
 
 #include "or.h"
 
-/* Define RB_AUGMENT to avoid warnings about if statements with emtpy bodies.
- */
-#define RB_AUGMENT(x) do{}while(0)
-#include "tree.h"
+#include "../common/ht.h"
 
 /********* START VARIABLES **********/
 
@@ -31,30 +28,34 @@
 /** A map from OR connection and circuit ID to circuit.  (Lookup performance is
  * very important here, since we need to do it every time a cell arrives.) */
 typedef struct orconn_circid_circuit_map_t {
-  RB_ENTRY(orconn_circid_circuit_map_t) node;
+  HT_ENTRY(orconn_circid_circuit_map_t) node;
   connection_t *or_conn;
   uint16_t circ_id;
   circuit_t *circuit;
 } orconn_circid_circuit_map_t;
 
-/** Helper for RB tree: compare the OR connection and circuit ID for a and b,
+/** Helper for hash tables: compare the OR connection and circuit ID for a and b,
  * and return less than, equal to, or greater than zero appropriately.
  */
 static INLINE int
-compare_orconn_circid_entries(orconn_circid_circuit_map_t *a,
-                              orconn_circid_circuit_map_t *b)
+_orconn_circid_entries_eq(orconn_circid_circuit_map_t *a,
+                          orconn_circid_circuit_map_t *b)
 {
-  if (a->or_conn < b->or_conn)
-    return -1;
-  else if (a->or_conn > b->or_conn)
-    return 1;
-  else
-    return ((int)b->circ_id) - ((int)a->circ_id);
-};
+  return a->or_conn == b->or_conn && a->circ_id == b->circ_id;
+}
 
-static RB_HEAD(orconn_circid_tree, orconn_circid_circuit_map_t) orconn_circid_circuit_map = RB_INITIALIZER(orconn_circid_circuit_map);
-RB_PROTOTYPE(orconn_circid_tree, orconn_circid_circuit_map_t, node, compare_orconn_circid_entries);
-RB_GENERATE(orconn_circid_tree, orconn_circid_circuit_map_t, node, compare_orconn_circid_entries);
+static INLINE unsigned int
+_orconn_circid_entry_hash(orconn_circid_circuit_map_t *a)
+{
+  return (((unsigned)a->circ_id)<<16) ^ (unsigned)(uintptr_t)(a->or_conn);
+}
+
+static HT_HEAD(orconn_circid_tree, orconn_circid_circuit_map_t) orconn_circid_circuit_map = HT_INITIALIZER();
+HT_PROTOTYPE(orconn_circid_tree, orconn_circid_circuit_map_t, node,
+             _orconn_circid_entry_hash, _orconn_circid_entries_eq);
+HT_GENERATE(orconn_circid_tree, orconn_circid_circuit_map_t, node,
+            _orconn_circid_entry_hash, _orconn_circid_entries_eq, 0.6,
+            malloc, realloc, free);
 
 /** The most recently returned entry from circuit_get_by_circid_orconn;
  * used to improve performance when many cells arrive in a row from the
@@ -102,11 +103,10 @@
   if (old_conn) { /* we may need to remove it from the conn-circid map */
     search.circ_id = old_id;
     search.or_conn = old_conn;
-    found = RB_FIND(orconn_circid_tree, &orconn_circid_circuit_map, &search);
+    found = HT_REMOVE(orconn_circid_tree, &orconn_circid_circuit_map, &search);
     if (found) {
-      RB_REMOVE(orconn_circid_tree, &orconn_circid_circuit_map, found);
+      tor_free(found);
     }
-    tor_free(found);
   }
 
   if (conn == NULL)
@@ -115,7 +115,7 @@
   /* now add the new one to the conn-circid map */
   search.circ_id = id;
   search.or_conn = conn;
-  found = RB_FIND(orconn_circid_tree, &orconn_circid_circuit_map, &search);
+  found = HT_FIND(orconn_circid_tree, &orconn_circid_circuit_map, &search);
   if (found) {
     found->circuit = circ;
   } else {
@@ -123,7 +123,7 @@
     found->circ_id = id;
     found->or_conn = conn;
     found->circuit = circ;
-    RB_INSERT(orconn_circid_tree, &orconn_circid_circuit_map, found);
+    HT_INSERT(orconn_circid_tree, &orconn_circid_circuit_map, found);
   }
 }
 
@@ -358,7 +358,7 @@
   } else {
     search.circ_id = circ_id;
     search.or_conn = conn;
-    found = RB_FIND(orconn_circid_tree, &orconn_circid_circuit_map, &search);
+    found = HT_FIND(orconn_circid_tree, &orconn_circid_circuit_map, &search);
     _last_circid_orconn_ent = found;
   }
   if (found && found->circuit)

Index: connection_edge.c
===================================================================
RCS file: /home/or/cvsroot/tor/src/or/connection_edge.c,v
retrieving revision 1.362
retrieving revision 1.363
diff -u -d -r1.362 -r1.363
--- connection_edge.c	16 Nov 2005 23:37:35 -0000	1.362
+++ connection_edge.c	23 Nov 2005 04:18:45 -0000	1.363
@@ -890,8 +890,8 @@
      val = _val;
      if (val->expires >= min_expires && val->expires <= max_expires) {
        if (!sl) {
-         addressmap_ent_remove(key, val);
          iter = strmap_iter_next_rmv(addressmap,iter);
+         addressmap_ent_remove(key, val);
          continue;
        } else if (val->new_address) {
          size_t len = strlen(key)+strlen(val->new_address)+2;

Index: dns.c
===================================================================
RCS file: /home/or/cvsroot/tor/src/or/dns.c,v
retrieving revision 1.173
retrieving revision 1.174
diff -u -d -r1.173 -r1.174
--- dns.c	25 Oct 2005 19:01:48 -0000	1.173
+++ dns.c	23 Nov 2005 04:18:45 -0000	1.174
@@ -18,7 +18,7 @@
  */
 
 #include "or.h"
-#include "tree.h"
+#include "../common/ht.h"
 
 /** Longest hostname we're willing to resolve. */
 #define MAX_ADDRESSLEN 256
@@ -55,7 +55,7 @@
  * list from oldest to newest.
  */
 typedef struct cached_resolve_t {
-  SPLAY_ENTRY(cached_resolve_t) node;
+  HT_ENTRY(cached_resolve_t) node;
   char address[MAX_ADDRESSLEN]; /**< The hostname to be resolved. */
   uint32_t addr; /**< IPv4 addr for <b>address</b>. */
   char state; /**< 0 is pending; 1 means answer is valid; 2 means resolve failed. */
@@ -77,26 +77,33 @@
 static void send_resolved_cell(connection_t *conn, uint8_t answer_type);
 
 /** Splay tree of cached_resolve objects. */
-static SPLAY_HEAD(cache_tree, cached_resolve_t) cache_root;
+static HT_HEAD(cache_tree, cached_resolve_t) cache_root;
 
 /** Function to compare hashed resolves on their addresses; used to
  * implement splay trees. */
-static int
-compare_cached_resolves(cached_resolve_t *a,
-                        cached_resolve_t *b)
+static INLINE int
+cached_resolves_eq(cached_resolve_t *a, cached_resolve_t *b)
 {
   /* make this smarter one day? */
-  return strncmp(a->address, b->address, MAX_ADDRESSLEN);
+  return !strncmp(a->address, b->address, MAX_ADDRESSLEN);
 }
 
-SPLAY_PROTOTYPE(cache_tree, cached_resolve_t, node, compare_cached_resolves);
-SPLAY_GENERATE(cache_tree, cached_resolve_t, node, compare_cached_resolves);
+static INLINE unsigned int
+cached_resolve_hash(cached_resolve_t *a)
+{
+  return ht_string_hash(a->address);
+}
+
+HT_PROTOTYPE(cache_tree, cached_resolve_t, node, cached_resolve_hash,
+             cached_resolves_eq);
+HT_GENERATE(cache_tree, cached_resolve_t, node, cached_resolve_hash,
+            cached_resolves_eq, 0.6, malloc, realloc, free);
 
 /** Initialize the DNS cache. */
 static void
 init_cache_tree(void)
 {
-  SPLAY_INIT(&cache_root);
+  HT_INIT(&cache_root);
 }
 
 /** Initialize the DNS subsystem; called by the OR process. */
@@ -123,12 +130,13 @@
 void
 dns_free_all(void)
 {
-  cached_resolve_t *ptr, *next;
-  for (ptr = SPLAY_MIN(cache_tree, &cache_root); ptr != NULL; ptr = next) {
-    next = SPLAY_NEXT(cache_tree, &cache_root, ptr);
-    SPLAY_REMOVE(cache_tree, &cache_root, ptr);
-    _free_cached_resolve(ptr);
+  cached_resolve_t **ptr, **next, *item;
+  for (ptr = HT_START(cache_tree, &cache_root); ptr != NULL; ptr = next) {
+    item = *ptr;
+    next = HT_NEXT_RMV(cache_tree, &cache_root, ptr);
+    _free_cached_resolve(item);
   }
+  HT_CLEAR(cache_tree, &cache_root);
 }
 
 /** Linked list of resolved addresses, oldest to newest. */
@@ -174,7 +182,7 @@
     oldest_cached_resolve = resolve->next;
     if (!oldest_cached_resolve) /* if there are no more, */
       newest_cached_resolve = NULL; /* then make sure the list's tail knows that too */
-    SPLAY_REMOVE(cache_tree, &cache_root, resolve);
+    HT_REMOVE(cache_tree, &cache_root, resolve);
     tor_free(resolve);
   }
 }
@@ -230,7 +238,7 @@
   }
   newest_cached_resolve = r;
 
-  SPLAY_INSERT(cache_tree, &cache_root, r);
+  HT_INSERT(cache_tree, &cache_root, r);
 }
 
 /** See if we have a cache entry for <b>exitconn</b>-\>address. if so,
@@ -273,7 +281,7 @@
 
   /* now check the tree to see if 'address' is already there. */
   strlcpy(search.address, exitconn->address, sizeof(search.address));
-  resolve = SPLAY_FIND(cache_tree, &cache_root, &search);
+  resolve = HT_FIND(cache_tree, &cache_root, &search);
   if (resolve) { /* already there */
     switch (resolve->state) {
       case CACHE_STATE_PENDING:
@@ -383,7 +391,7 @@
 
   strlcpy(search.address, conn->address, sizeof(search.address));
 
-  resolve = SPLAY_FIND(cache_tree, &cache_root, &search);
+  resolve = HT_FIND(cache_tree, &cache_root, &search);
   if (!resolve) {
     /* XXXX RD This *is* a bug, right? -NM */
     notice(LD_BUG,"Address '%s' is not pending. Dropping.", safe_str(conn->address));
@@ -422,10 +430,10 @@
 assert_connection_edge_not_dns_pending(connection_t *conn)
 {
   pending_connection_t *pend;
-  cached_resolve_t *resolve;
+  cached_resolve_t **resolve;
 
-  SPLAY_FOREACH(resolve, cache_tree, &cache_root) {
-    for (pend = resolve->pending_connections;
+  HT_FOREACH(resolve, cache_tree, &cache_root) {
+    for (pend = (*resolve)->pending_connections;
          pend;
          pend = pend->next) {
       tor_assert(pend->conn != conn);
@@ -439,10 +447,10 @@
 assert_all_pending_dns_resolves_ok(void)
 {
   pending_connection_t *pend;
-  cached_resolve_t *resolve;
+  cached_resolve_t **resolve;
 
-  SPLAY_FOREACH(resolve, cache_tree, &cache_root) {
-    for (pend = resolve->pending_connections;
+  HT_FOREACH(resolve, cache_tree, &cache_root) {
+    for (pend = (*resolve)->pending_connections;
          pend;
          pend = pend->next) {
       assert_connection_ok(pend->conn, 0);
@@ -467,7 +475,7 @@
 
   strlcpy(search.address, address, sizeof(search.address));
 
-  resolve = SPLAY_FIND(cache_tree, &cache_root, &search);
+  resolve = HT_FIND(cache_tree, &cache_root, &search);
   if (!resolve) {
     /* XXXX RD This *is* a bug, right? -NM */
     notice(LD_BUG,"Address '%s' is not pending. Dropping.", safe_str(address));
@@ -529,7 +537,7 @@
   }
 
   /* remove resolve from the tree */
-  SPLAY_REMOVE(cache_tree, &cache_root, resolve);
+  HT_REMOVE(cache_tree, &cache_root, resolve);
 
   tor_free(resolve);
 }
@@ -552,7 +560,7 @@
 
   strlcpy(search.address, address, sizeof(search.address));
 
-  resolve = SPLAY_FIND(cache_tree, &cache_root, &search);
+  resolve = HT_FIND(cache_tree, &cache_root, &search);
   if (!resolve) {
     info(LD_EXIT,"Resolved unasked address '%s'; caching anyway.",
          safe_str(address));

Index: rephist.c
===================================================================
RCS file: /home/or/cvsroot/tor/src/or/rephist.c,v
retrieving revision 1.70
retrieving revision 1.71
diff -u -d -r1.70 -r1.71
--- rephist.c	25 Oct 2005 18:01:01 -0000	1.70
+++ rephist.c	23 Nov 2005 04:18:45 -0000	1.71
@@ -358,8 +358,8 @@
     digestmap_iter_get(orhist_it, &d1, &or_history_p);
     or_history = or_history_p;
     if (or_history->changed < before) {
-      free_or_history(or_history);
       orhist_it = digestmap_iter_next_rmv(history_map, orhist_it);
+      free_or_history(or_history);
       continue;
     }
     for (lhist_it = digestmap_iter_init(or_history->link_history_map);
@@ -367,9 +367,9 @@
       digestmap_iter_get(lhist_it, &d2, &link_history_p);
       link_history = link_history_p;
       if (link_history->changed < before) {
+        lhist_it = digestmap_iter_next_rmv(or_history->link_history_map,lhist_it);
         rephist_total_alloc -= sizeof(link_history_t);
         tor_free(link_history);
-        lhist_it = digestmap_iter_next_rmv(or_history->link_history_map,lhist_it);
         continue;
       }
       lhist_it = digestmap_iter_next(or_history->link_history_map,lhist_it);

Index: test.c
===================================================================
RCS file: /home/or/cvsroot/tor/src/or/test.c,v
retrieving revision 1.211
retrieving revision 1.212
diff -u -d -r1.211 -r1.212
--- test.c	5 Nov 2005 20:15:27 -0000	1.211
+++ test.c	23 Nov 2005 04:18:45 -0000	1.212
@@ -972,19 +972,6 @@
   tor_free(buf1);
 }
 
-static void *
-_squareAndRemoveK4(const char *key, void*val, void *data)
-{
-  int *ip = (int*)data;
-  intptr_t v;
-  if (strcmp(key,"K4") == 0) {
-    ++(*ip);
-    return NULL;
-  }
-  v = (intptr_t)val;
-  return (void*)(v*v);
-}
-
 static void
 test_strmap(void)
 {
@@ -1016,13 +1003,7 @@
   strmap_set(map, "K5", (void*)104);
   strmap_set(map, "K6", (void*)105);
 
-  count = 0;
-  strmap_foreach(map, _squareAndRemoveK4, &count);
-  test_eq(count, 1);
-  test_eq_ptr(strmap_get(map, "K4"), NULL);
-  test_eq_ptr(strmap_get(map, "K1"), (void*)10000);
-  test_eq_ptr(strmap_get(map, "K6"), (void*)11025);
-
+#if 0
   iter = strmap_iter_init(map);
   strmap_iter_get(iter,&k,&v);
   test_streq(k, "K1");
@@ -1045,6 +1026,8 @@
   /* Make sure we removed K2, but not the others. */
   test_eq_ptr(strmap_get(map, "K2"), NULL);
   test_eq_ptr(strmap_get(map, "K5"), (void*)10816);
+#endif
+
 
   /* Clean up after ourselves. */
   strmap_free(map, NULL);



More information about the tor-commits mailing list