[or-cvs] New circuit building strategy: keep a list of ports that we...

Roger Dingledine arma at seul.org
Sun Dec 5 07:10:11 UTC 2004


Update of /home2/or/cvsroot/tor/src/or
In directory moria.mit.edu:/home2/arma/work/onion/cvs/tor/src/or

Modified Files:
	circuitbuild.c circuituse.c connection_edge.c directory.c or.h 
	rephist.c routerlist.c 
Log Message:
New circuit building strategy: keep a list of ports that we've used in                     the past 6 hours, and always try to have 2 circuits open or on the way
that will handle each such port. (We can extend this to include addresses
if exit policies shift to require that.) Seed us with port 80 so web
browsers won't complain that Tor is "slow to start up".

This was necessary because our old circuit building strategy just involved
counting circuits, and as time went by we would build up a big pile of
circuits that had peculiar exit policies (e.g. only exit to 9001-9100)
which would take up space in the circuit pile but never get used.

Fix router_compare_addr_to_addr_policy: it was not treating a port of *
as always matching, so we were picking reject *:* nodes as exit nodes too.

If you haven't used a clean circuit in an hour, throw it away, just to
be on the safe side.

This means after 6 hours a totally unused Tor client will have no
circuits open.


Index: circuitbuild.c
===================================================================
RCS file: /home2/or/cvsroot/tor/src/or/circuitbuild.c,v
retrieving revision 1.68
retrieving revision 1.69
diff -u -d -r1.68 -r1.69
--- circuitbuild.c	4 Dec 2004 02:51:11 -0000	1.68
+++ circuitbuild.c	5 Dec 2004 07:10:08 -0000	1.69
@@ -787,6 +787,56 @@
   return routelen;
 }
 
+/** Fetch the list of predicted ports, turn it into a smartlist of
+ * strings, remove the ones that are already handled by an
+ * existing circuit, and return it.
+ */
+static smartlist_t *
+circuit_get_unhandled_ports(time_t now) {
+  char *pp = rep_hist_get_predicted_ports(now);
+  smartlist_t *needed_ports = smartlist_create();
+  smartlist_split_string(needed_ports, pp, " ", SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0);
+  tor_free(pp);
+
+  circuit_remove_handled_ports(needed_ports);
+  return needed_ports;
+}
+
+/** Return 1 if we already have circuits present or on the way for
+ * all anticipated ports. Return 0 if we should make more.
+ */
+int
+circuit_all_predicted_ports_handled(time_t now) {
+  int enough;
+  smartlist_t *sl = circuit_get_unhandled_ports(now);
+  enough = (smartlist_len(sl) == 0);
+  smartlist_free(sl);
+  return enough;
+}
+
+/** Return 1 if <b>router</b> can handle one or more of the ports in
+ * <b>needed_ports</b>, else return 0.
+ */
+static int
+router_handles_some_port(routerinfo_t *router, smartlist_t *needed_ports) {
+  int i;
+  uint16_t port;
+
+  for (i = 0; i < smartlist_len(needed_ports); ++i) {
+    port = *(uint16_t *)smartlist_get(needed_ports, i);
+    tor_assert(port);
+    if (router_compare_addr_to_addr_policy(0, port, router->exit_policy) !=
+          ADDR_POLICY_REJECTED)
+      return 1;
+  }
+  return 0;
+}
+
+/** How many circuits do we want simultaneously in-progress to handle
+ * a given stream?
+ */
+#define MIN_CIRCUITS_HANDLING_STREAM 2
+
 /** Return a pointer to a suitable router to be the exit node for the
  * general-purpose circuit we're about to build.
  *
@@ -820,7 +870,7 @@
     if (carray[i]->type == CONN_TYPE_AP &&
         carray[i]->state == AP_CONN_STATE_CIRCUIT_WAIT &&
         !carray[i]->marked_for_close &&
-        !circuit_stream_is_being_handled(carray[i]))
+        !circuit_stream_is_being_handled(carray[i], 0, MIN_CIRCUITS_HANDLING_STREAM))
       ++n_pending_connections;
   }
 //  log_fn(LOG_DEBUG, "Choosing exit node; %d connections are pending",
@@ -873,7 +923,7 @@
       if (carray[j]->type != CONN_TYPE_AP ||
           carray[j]->state != AP_CONN_STATE_CIRCUIT_WAIT ||
           carray[j]->marked_for_close ||
-          circuit_stream_is_being_handled(carray[j]))
+          circuit_stream_is_being_handled(carray[j], 0, MIN_CIRCUITS_HANDLING_STREAM))
         continue; /* Skip everything but APs in CIRCUIT_WAIT */
       if (connection_ap_can_use_exit(carray[j], router)) {
         ++n_supported[i];
@@ -920,18 +970,35 @@
     router = routerlist_sl_choose_by_bandwidth(sl);
   } else {
     /* Either there are no pending connections, or no routers even seem to
-     * possibly support any of them.  Choose a router at random. */
+     * possibly support any of them.  Choose a router at random that satisfies
+     * at least one predicted exit port. */
+
+    int try;
+    smartlist_t *needed_ports = circuit_get_unhandled_ports(time(NULL));
+
     if (best_support == -1) {
       log(LOG_WARN, "All routers are down or middleman -- choosing a doomed exit at random.");
     }
-    for (i = 0; i < smartlist_len(dir->routers); i++)
-      if (n_supported[i] != -1)
-        smartlist_add(sl, smartlist_get(dir->routers, i));
+    for (try = 0; try < 2; try++) {
+      /* try once to pick only from routers that satisfy a needed port,
+       * then if there are none, pick from any that support exiting. */
+      for (i = 0; i < smartlist_len(dir->routers); i++) {
+        router = smartlist_get(dir->routers, i);
+        if (n_supported[i] != -1 &&
+            (try || router_handles_some_port(router, needed_ports))) {
+          log_fn(LOG_DEBUG,"Try %d: '%s' is a possibility.", try, router->nickname);
+          smartlist_add(sl, router);
+        }
+      }
 
-    smartlist_subtract(sl,excludedexits);
-    if (options->StrictExitNodes || smartlist_overlap(sl,preferredexits))
-      smartlist_intersect(sl,preferredexits);
-    router = routerlist_sl_choose_by_bandwidth(sl);
+      smartlist_subtract(sl,excludedexits);
+      if (options->StrictExitNodes || smartlist_overlap(sl,preferredexits))
+        smartlist_intersect(sl,preferredexits);
+      router = routerlist_sl_choose_by_bandwidth(sl);
+      if (router)
+        break;
+    }
+    smartlist_free(needed_ports);
   }
 
   smartlist_free(preferredexits);

Index: circuituse.c
===================================================================
RCS file: /home2/or/cvsroot/tor/src/or/circuituse.c,v
retrieving revision 1.34
retrieving revision 1.35
diff -u -d -r1.34 -r1.35
--- circuituse.c	4 Dec 2004 03:26:35 -0000	1.34
+++ circuituse.c	5 Dec 2004 07:10:08 -0000	1.35
@@ -249,16 +249,34 @@
   }
 }
 
-/** How many circuits do we want simultaneously in-progress to handle
- * a given stream?
+/** Remove any elements in <b>needed_ports</b> that are handled by an
+ * open or in-progress circuit.
  */
-#define MIN_CIRCUITS_HANDLING_STREAM 2
+void
+circuit_remove_handled_ports(smartlist_t *needed_ports) {
+  int i;
+  uint16_t port;
+  char *portstring;
 
-/** Return 1 if at least MIN_CIRCUITS_HANDLING_STREAM non-open
- * general-purpose circuits will have an acceptable exit node for
- * conn. Else return 0.
+  for (i = 0; i < smartlist_len(needed_ports); ++i) {
+    portstring = smartlist_get(needed_ports, i);
+    port = *(uint16_t*)(portstring);
+    tor_assert(port);
+    if (circuit_stream_is_being_handled(NULL, port, 2)) {
+//      log_fn(LOG_DEBUG,"Port %d is already being handled; removing.", port);
+      smartlist_del(needed_ports, i--);
+      tor_free(portstring);
+    } else {
+      log_fn(LOG_DEBUG,"Port %d is not handled.", port);
+    }
+  }
+}
+
+/** Return 1 if at least <b>min</b> general-purpose circuits will have
+ * an acceptable exit node for conn if conn is defined, else for "*:port".
+ * Else return 0.
  */
-int circuit_stream_is_being_handled(connection_t *conn) {
+int circuit_stream_is_being_handled(connection_t *conn, uint16_t port, int min) {
   circuit_t *circ;
   routerinfo_t *exitrouter;
   int num=0;
@@ -266,15 +284,19 @@
 
   for (circ=global_circuitlist;circ;circ = circ->next) {
     if (CIRCUIT_IS_ORIGIN(circ) &&
-        circ->state != CIRCUIT_STATE_OPEN &&
         !circ->marked_for_close &&
         circ->purpose == CIRCUIT_PURPOSE_C_GENERAL &&
         (!circ->timestamp_dirty ||
          circ->timestamp_dirty + get_options()->NewCircuitPeriod < now)) {
       exitrouter = router_get_by_digest(circ->build_state->chosen_exit_digest);
-      if (exitrouter && connection_ap_can_use_exit(conn, exitrouter))
-        if (++num >= MIN_CIRCUITS_HANDLING_STREAM)
+      if (exitrouter &&
+          ((conn && connection_ap_can_use_exit(conn, exitrouter)) ||
+           (!conn &&
+            router_compare_addr_to_addr_policy(0, port, exitrouter->exit_policy) !=
+              ADDR_POLICY_REJECTED))) {
+        if (++num >= min)
           return 1;
+      }
     }
   }
   return 0;
@@ -316,6 +338,7 @@
     }
   }
 
+#if 0
 /** How many simultaneous in-progress general-purpose circuits do we
  * want to be building at once, if there are no open general-purpose
  * circuits?
@@ -327,6 +350,14 @@
                < CIRCUIT_MIN_BUILDING_GENERAL) {
     circuit_launch_by_identity(CIRCUIT_PURPOSE_C_GENERAL, NULL);
   }
+#endif
+
+  /* if we know of a port that's been requested recently and no
+   * circuit is currently available that can handle it, start one
+   * for that too. */
+  if (!circuit_all_predicted_ports_handled(now)) {
+    circuit_launch_by_identity(CIRCUIT_PURPOSE_C_GENERAL, NULL);
+  }
 
   /* XXX count idle rendezvous circs and build more */
 }
@@ -471,12 +502,19 @@
     } else if (!circ->timestamp_dirty && CIRCUIT_IS_ORIGIN(circ) &&
                circ->state == CIRCUIT_STATE_OPEN &&
                circ->purpose == CIRCUIT_PURPOSE_C_GENERAL) {
-      /* Also, gather a list of open unused general circuits that we created.
-       * Because we add elements to the front of global_circuitlist,
-       * the last elements of unused_open_circs will be the oldest
-       * ones.
-       */
-      smartlist_add(unused_open_circs, circ);
+#define CIRCUIT_UNUSED_CIRC_TIMEOUT 3600 /* an hour */
+      if (circ->timestamp_created + CIRCUIT_UNUSED_CIRC_TIMEOUT < now) {
+        log_fn(LOG_DEBUG,"Closing circuit that has been unused for %d seconds.",
+               (int)(now - circ->timestamp_created));
+        circuit_mark_for_close(circ);
+      } else {
+        /* Also, gather a list of open unused general circuits that we created.
+         * Because we add elements to the front of global_circuitlist,
+         * the last elements of unused_open_circs will be the oldest
+         * ones.
+         */
+        smartlist_add(unused_open_circs, circ);
+      }
     }
   }
   for (i = MAX_UNUSED_OPEN_CIRCUITS; i < smartlist_len(unused_open_circs); ++i) {

Index: connection_edge.c
===================================================================
RCS file: /home2/or/cvsroot/tor/src/or/connection_edge.c,v
retrieving revision 1.255
retrieving revision 1.256
diff -u -d -r1.255 -r1.256
--- connection_edge.c	4 Dec 2004 07:13:37 -0000	1.255
+++ connection_edge.c	5 Dec 2004 07:10:08 -0000	1.256
@@ -405,6 +405,7 @@
       return -1;
     }
     conn->state = AP_CONN_STATE_CIRCUIT_WAIT;
+    rep_hist_note_used_port(socks->port, time(NULL)); /* help predict this next time */
     return connection_ap_handshake_attach_circuit(conn);
   } else {
     /* it's a hidden-service request */
@@ -1016,7 +1017,7 @@
   } else {
     addr = client_dns_lookup_entry(conn->socks_request->address);
     if (router_compare_addr_to_addr_policy(addr, conn->socks_request->port,
-                                           exit->exit_policy) < 0)
+          exit->exit_policy) == ADDR_POLICY_REJECTED)
       return 0;
   }
   return 1;

Index: directory.c
===================================================================
RCS file: /home2/or/cvsroot/tor/src/or/directory.c,v
retrieving revision 1.178
retrieving revision 1.179
diff -u -d -r1.178 -r1.179
--- directory.c	4 Dec 2004 01:14:36 -0000	1.178
+++ directory.c	5 Dec 2004 07:10:08 -0000	1.179
@@ -582,7 +582,7 @@
 
   if (conn->purpose == DIR_PURPOSE_FETCH_DIR) {
     /* fetch/process the directory to learn about new routers. */
-    log_fn(LOG_INFO,"Received directory (size %d):\n%s", (int)body_len, body);
+    log_fn(LOG_INFO,"Received directory (size %d)", (int)body_len);
     if (status_code == 503 || body_len == 0) {
       log_fn(LOG_INFO,"Empty directory. Ignoring.");
       tor_free(body); tor_free(headers);
@@ -606,7 +606,7 @@
     running_routers_t *rrs;
     routerlist_t *rl;
     /* just update our list of running routers, if this list is new info */
-    log_fn(LOG_INFO,"Received running-routers list (size %d):\n%s", (int)body_len, body);
+    log_fn(LOG_INFO,"Received running-routers list (size %d)", (int)body_len);
     if (status_code != 200) {
       log_fn(LOG_WARN,"Received http status code %d from dirserver. Failing.",
              status_code);

Index: or.h
===================================================================
RCS file: /home2/or/cvsroot/tor/src/or/or.h,v
retrieving revision 1.504
retrieving revision 1.505
diff -u -d -r1.504 -r1.505
--- or.h	4 Dec 2004 03:26:35 -0000	1.504
+++ or.h	5 Dec 2004 07:10:08 -0000	1.505
@@ -1048,6 +1048,7 @@
 int circuit_finish_handshake(circuit_t *circ, char *reply);
 int circuit_truncated(circuit_t *circ, crypt_path_t *layer);
 int onionskin_answer(circuit_t *circ, unsigned char *payload, unsigned char *keys);
+int circuit_all_predicted_ports_handled(time_t now);
 void onion_append_to_cpath(crypt_path_t **head_ptr, crypt_path_t *new_hop);
 
 /********************************* circuitlist.c ***********************/
@@ -1082,7 +1083,8 @@
 /********************************* circuituse.c ************************/
 
 void circuit_expire_building(time_t now);
-int circuit_stream_is_being_handled(connection_t *conn);
+void circuit_remove_handled_ports(smartlist_t *needed_ports);
+int circuit_stream_is_being_handled(connection_t *conn, uint16_t port, int min);
 void circuit_build_needed_circs(time_t now);
 void circuit_detach_stream(circuit_t *circ, connection_t *conn);
 void circuit_about_to_close_connection(connection_t *conn);
@@ -1447,6 +1449,8 @@
 int rep_hist_bandwidth_assess(void);
 char *rep_hist_get_bandwidth_lines(void);
 void rep_history_clean(time_t before);
+void rep_hist_note_used_port(uint16_t port, time_t now);
+char *rep_hist_get_predicted_ports(time_t now);
 
 /********************************* rendclient.c ***************************/
 

Index: rephist.c
===================================================================
RCS file: /home2/or/cvsroot/tor/src/or/rephist.c,v
retrieving revision 1.48
retrieving revision 1.49
diff -u -d -r1.48 -r1.49
--- rephist.c	4 Dec 2004 08:56:59 -0000	1.48
+++ rephist.c	5 Dec 2004 07:10:08 -0000	1.49
@@ -11,6 +11,7 @@
 #include "or.h"
 
 static void bw_arrays_init(void);
+static void predicted_ports_init(void);
 
 /** History of an OR-\>OR link. */
 typedef struct link_history_t {
@@ -133,6 +134,7 @@
 {
   history_map = strmap_new();
   bw_arrays_init();
+  predicted_ports_init();
 }
 
 /** Remember that an attempt to connect to the OR with identity digest
@@ -617,3 +619,78 @@
   return buf;
 }
 
+/** A list of port numbers that have been used recently. */
+static smartlist_t *predicted_ports_list=NULL;
+/** The corresponding most recently used time for each port. */
+static smartlist_t *predicted_ports_times=NULL;
+
+static void add_predicted_port(uint16_t port, time_t now) {
+  uint16_t *tmp_port = tor_malloc(sizeof(uint16_t));
+  time_t *tmp_time = tor_malloc(sizeof(time_t));
+  *tmp_port = port;
+  *tmp_time = now;
+  smartlist_add(predicted_ports_list, tmp_port);
+  smartlist_add(predicted_ports_times, tmp_time);
+}
+
+static void predicted_ports_init(void) {
+  predicted_ports_list = smartlist_create();
+  predicted_ports_times = smartlist_create();
+  add_predicted_port(80, time(NULL)); /* add one to kickstart us */
+}
+
+/** Remember that <b>port</b> has been asked for as of time <b>now</b>.
+ * This is used for predicting what sorts of streams we'll make in the
+ * future and making circuits to anticipate that.
+ */
+void rep_hist_note_used_port(uint16_t port, time_t now) {
+  int i;
+  uint16_t *tmp_port;
+  time_t *tmp_time;
+
+  tor_assert(predicted_ports_list);
+  tor_assert(predicted_ports_times);
+
+  if(!port) /* record nothing */
+    return;
+
+  for (i = 0; i < smartlist_len(predicted_ports_list); ++i) {
+    tmp_port = smartlist_get(predicted_ports_list, i);
+    tmp_time = smartlist_get(predicted_ports_times, i);
+    if (*tmp_port == port) {
+      *tmp_time = now;
+      return;
+    }
+  }
+  /* it's not there yet; we need to add it */
+  add_predicted_port(port, now);
+}
+
+#define PREFERRED_PORTS_RELEVANCE_TIME (6*3600) /* 6 hours */
+
+/** Allocate and return a string of space-separated port numbers that
+ * are likely to be asked for in the near future.
+ */
+char *rep_hist_get_predicted_ports(time_t now) {
+  int i;
+  uint16_t *tmp_port;
+  time_t *tmp_time;
+
+  tor_assert(predicted_ports_list);
+  tor_assert(predicted_ports_times);
+
+  /* clean out obsolete entries */
+  for (i = 0; i < smartlist_len(predicted_ports_list); ++i) {
+    tmp_time = smartlist_get(predicted_ports_times, i);
+    if (*tmp_time + PREFERRED_PORTS_RELEVANCE_TIME < now) {
+      tmp_port = smartlist_get(predicted_ports_list, i);
+      smartlist_del(predicted_ports_list, i);
+      smartlist_del(predicted_ports_times, i);
+      tor_free(tmp_port);
+      tor_free(tmp_time);
+      i--;
+    }
+  }
+  return smartlist_join_strings(predicted_ports_list, " ", 0, NULL);
+}
+

Index: routerlist.c
===================================================================
RCS file: /home2/or/cvsroot/tor/src/or/routerlist.c,v
retrieving revision 1.198
retrieving revision 1.199
diff -u -d -r1.198 -r1.199
--- routerlist.c	4 Dec 2004 10:18:41 -0000	1.198
+++ routerlist.c	5 Dec 2004 07:10:08 -0000	1.199
@@ -936,10 +936,11 @@
 
 /** Decide whether a given addr:port is definitely accepted, definitely
  * rejected, or neither by a given policy.  If <b>addr</b> is 0, we
- * don't know the IP of the target address.
+ * don't know the IP of the target address. If <b>port</b> is 0, we
+ * don't know the port of the target address.
  *
- * Returns -1 for "rejected", 0 for "accepted", 1 for "maybe" (since IP is
- * unknown).
+ * Returns -1 for "rejected", 0 for "accepted", 1 for "maybe" (since IP or
+ * port is unknown).
  */
 int router_compare_addr_to_addr_policy(uint32_t addr, uint16_t port,
                                        addr_policy_t *policy)
@@ -948,7 +949,6 @@
   int maybe_accept = 0;
   int match = 0;
   int maybe = 0;
-  struct in_addr in;
   addr_policy_t *tmpe;
 
   for (tmpe=policy; tmpe; tmpe=tmpe->next) {
@@ -956,7 +956,8 @@
     maybe = 0;
     if (!addr) {
       /* Address is unknown. */
-      if (port >= tmpe->prt_min && port <= tmpe->prt_max) {
+      if ((port >= tmpe->prt_min && port <= tmpe->prt_max) ||
+           (!port && tmpe->prt_min<=1 && tmpe->prt_max>=65535)) {
         /* The port definitely matches. */
         if (tmpe->msk == 0) {
           match = 1;
@@ -965,10 +966,6 @@
         }
       } else if (!port) {
         /* The port maybe matches. */
-        /* XXX Nick: it looks port 0 only means something special for resolve
-         * commands, which can currently be handled by any exit node.
-         * Should we treat those specially elsewhere?
-         */
         maybe = 1;
       }
     } else {
@@ -989,9 +986,10 @@
         maybe_accept = 1;
     }
     if (match) {
-      in.s_addr = htonl(addr);
-      log_fn(LOG_DEBUG,"Address %s:%d matches policy '%s'",
-             inet_ntoa(in), port, tmpe->string);
+//      struct in_addr in;
+//      in.s_addr = htonl(addr);
+//      log_fn(LOG_DEBUG,"Address %s:%d matches policy '%s'",
+//             inet_ntoa(in), port, tmpe->string);
       if (tmpe->policy_type == ADDR_POLICY_ACCEPT) {
         /* If we already hit a clause that might trigger a 'reject', than we
          * can't be sure of this certain 'accept'.*/
@@ -1024,7 +1022,7 @@
 /** Return true iff <b>router</b> does not permit exit streams.
  */
 int router_exit_policy_rejects_all(routerinfo_t *router) {
-  return router_compare_addr_to_addr_policy(0, 1, router->exit_policy)
+  return router_compare_addr_to_addr_policy(0, 0, router->exit_policy)
     == ADDR_POLICY_REJECTED;
 }
 



More information about the tor-commits mailing list