[tor-commits] [tor/master] Use a linear algorithm to subtract two nodelists.

asn at torproject.org asn at torproject.org
Tue Apr 30 16:18:52 UTC 2019


commit 650b94ebc157da01fcb34e08341ad1717c61f4fe
Author: Nick Mathewson <nickm at torproject.org>
Date:   Fri Apr 26 11:03:22 2019 -0400

    Use a linear algorithm to subtract two nodelists.
    
    The nodelist_idx for each node_t serves as a unique identifier for
    the node, so we can use a bitarray to hold all the excluded
    nodes, and then remove them from the smartlist.
    
    Previously use used smartlist_subtract(sl, excluded), which is
    O(len(sl)*len(excluded)).
    
    We can use this function in other places too, but this is the one
    that showed up on the profiles of 30291.
    
    Closes ticket 30307.
---
 changes/ticket30307                |  4 +++
 src/feature/nodelist/node_select.c | 74 ++++++++++++++++++++++++++++++--------
 src/test/test_entrynodes.c         |  1 +
 3 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/changes/ticket30307 b/changes/ticket30307
new file mode 100644
index 000000000..abcacb608
--- /dev/null
+++ b/changes/ticket30307
@@ -0,0 +1,4 @@
+  o Major features (performance):
+    - Update our node selection algorithm to exclude nodes in linear time.
+      Previously, the algorithm was quadratic, which could slow down heavily
+      used onion services.  Closes ticket 30307.
diff --git a/src/feature/nodelist/node_select.c b/src/feature/nodelist/node_select.c
index 93ddb066d..719b4b1b2 100644
--- a/src/feature/nodelist/node_select.c
+++ b/src/feature/nodelist/node_select.c
@@ -30,6 +30,7 @@
 #include "feature/nodelist/routerset.h"
 #include "feature/relay/router.h"
 #include "feature/relay/routermode.h"
+#include "lib/container/bitarray.h"
 #include "lib/crypt_ops/crypto_rand.h"
 #include "lib/math/fp.h"
 
@@ -826,6 +827,58 @@ routerlist_add_node_and_family(smartlist_t *sl, const routerinfo_t *router)
   nodelist_add_node_and_family(sl, node);
 }
 
+/**
+ * Remove every node_t that appears in <b>excluded</b> from <b>sl</b>.
+ *
+ * Behaves like smartlist_subtract, but uses nodelist_idx values to deliver
+ * linear performance when smartlist_subtract would be quadratic.
+ **/
+static void
+nodelist_subtract(smartlist_t *sl, const smartlist_t *excluded)
+{
+  const smartlist_t *nodelist = nodelist_get_list();
+  const int nodelist_len = smartlist_len(nodelist);
+  bitarray_t *excluded_idx = bitarray_init_zero(nodelist_len);
+
+  /* We haven't used nodelist_idx in this way previously, so I'm going to be
+   * paranoid in this code, and check that nodelist_idx is correct for every
+   * node before we use it.  If we fail, we fall back to smartlist_subtract().
+   */
+
+  /* Set the excluded_idx bit corresponding to every excluded node...
+   */
+  SMARTLIST_FOREACH_BEGIN(excluded, const node_t *, node) {
+    const int idx = node->nodelist_idx;
+    if (BUG(idx < 0) || BUG(idx >= nodelist_len) ||
+        BUG(node != smartlist_get(nodelist, idx))) {
+      goto internal_error;
+    }
+    bitarray_set(excluded_idx, idx);
+  } SMARTLIST_FOREACH_END(node);
+
+  /* Then remove them from sl.
+   */
+  SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) {
+    const int idx = node->nodelist_idx;
+    if (BUG(idx < 0) || BUG(idx >= nodelist_len) ||
+        BUG(node != smartlist_get(nodelist, idx))) {
+      goto internal_error;
+    }
+    if (bitarray_is_set(excluded_idx, idx)) {
+      SMARTLIST_DEL_CURRENT(sl, node);
+    }
+  } SMARTLIST_FOREACH_END(node);
+
+  bitarray_free(excluded_idx);
+  return;
+
+ internal_error:
+  log_warn(LD_BUG, "Internal error prevented us from using the fast method "
+           "for subtracting nodelists. Falling back to the quadratic way.");
+  smartlist_subtract(sl, excluded);
+  bitarray_free(excluded_idx);
+}
+
 /** Return a random running node from the nodelist. Never
  * pick a node that is in
  * <b>excludedsmartlist</b>, or which matches <b>excludedset</b>,
@@ -860,6 +913,7 @@ router_choose_random_node(smartlist_t *excludedsmartlist,
   const int direct_conn = (flags & CRN_DIRECT_CONN) != 0;
   const int rendezvous_v3 = (flags & CRN_RENDEZVOUS_V3) != 0;
 
+  const smartlist_t *node_list = nodelist_get_list();
   smartlist_t *sl=smartlist_new(),
     *excludednodes=smartlist_new();
   const node_t *choice = NULL;
@@ -870,17 +924,17 @@ router_choose_random_node(smartlist_t *excludedsmartlist,
   rule = weight_for_exit ? WEIGHT_FOR_EXIT :
     (need_guard ? WEIGHT_FOR_GUARD : WEIGHT_FOR_MID);
 
-  SMARTLIST_FOREACH_BEGIN(nodelist_get_list(), node_t *, node) {
+  SMARTLIST_FOREACH_BEGIN(node_list, const node_t *, node) {
     if (node_allows_single_hop_exits(node)) {
       /* Exclude relays that allow single hop exit circuits. This is an
        * obsolete option since 0.2.9.2-alpha and done by default in
        * 0.3.1.0-alpha. */
-      smartlist_add(excludednodes, node);
+      smartlist_add(excludednodes, (node_t*)node);
     } else if (rendezvous_v3 &&
                !node_supports_v3_rendezvous_point(node)) {
       /* Exclude relays that do not support to rendezvous for a hidden service
        * version 3. */
-      smartlist_add(excludednodes, node);
+      smartlist_add(excludednodes, (node_t*)node);
     }
   } SMARTLIST_FOREACH_END(node);
 
@@ -897,19 +951,11 @@ router_choose_random_node(smartlist_t *excludedsmartlist,
            "We found %d running nodes.",
             smartlist_len(sl));
 
-  smartlist_subtract(sl,excludednodes);
-  log_debug(LD_CIRC,
-            "We removed %d excludednodes, leaving %d nodes.",
-            smartlist_len(excludednodes),
-            smartlist_len(sl));
-
   if (excludedsmartlist) {
-    smartlist_subtract(sl,excludedsmartlist);
-    log_debug(LD_CIRC,
-              "We removed %d excludedsmartlist, leaving %d nodes.",
-              smartlist_len(excludedsmartlist),
-              smartlist_len(sl));
+    smartlist_add_all(excludednodes, excludedsmartlist);
   }
+  nodelist_subtract(sl, excludednodes);
+
   if (excludedset) {
     routerset_subtract_nodes(sl,excludedset);
     log_debug(LD_CIRC,
diff --git a/src/test/test_entrynodes.c b/src/test/test_entrynodes.c
index bdf057bb5..c43b21c67 100644
--- a/src/test/test_entrynodes.c
+++ b/src/test/test_entrynodes.c
@@ -197,6 +197,7 @@ big_fake_network_setup(const struct testcase_t *testcase)
       n->md->exit_policy = parse_short_policy("accept 443");
     }
 
+    n->nodelist_idx = smartlist_len(big_fake_net_nodes);
     smartlist_add(big_fake_net_nodes, n);
   }
 





More information about the tor-commits mailing list