[or-cvs] r15091: Start noticing and reporting bootstrapping failures too. It (in tor/trunk: doc/spec/proposals/ideas src/or)

arma at seul.org arma at seul.org
Mon Jun 9 18:32:44 UTC 2008


Author: arma
Date: 2008-06-09 14:32:43 -0400 (Mon, 09 Jun 2008)
New Revision: 15091

Modified:
   tor/trunk/doc/spec/proposals/ideas/xxx-bootstrap-phases.txt
   tor/trunk/src/or/connection.c
   tor/trunk/src/or/connection_or.c
   tor/trunk/src/or/control.c
   tor/trunk/src/or/directory.c
   tor/trunk/src/or/or.h
Log:
Start noticing and reporting bootstrapping failures too. It looks like
we never bothered learning why OR conns fail, so next step is to add some
infrastructure for that.


Modified: tor/trunk/doc/spec/proposals/ideas/xxx-bootstrap-phases.txt
===================================================================
--- tor/trunk/doc/spec/proposals/ideas/xxx-bootstrap-phases.txt	2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/doc/spec/proposals/ideas/xxx-bootstrap-phases.txt	2008-06-09 18:32:43 UTC (rev 15091)
@@ -165,7 +165,7 @@
   tag=circuit_create "Establishing circuits"
 
   Once we've finished our TLS handshake with an entry guard, we will
-  set about rying to make some 3-hop circuits in case we need them soon.
+  set about trying to make some 3-hop circuits in case we need them soon.
 
   Phase 100:
   tag=done summary="Done"

Modified: tor/trunk/src/or/connection.c
===================================================================
--- tor/trunk/src/or/connection.c	2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/src/or/connection.c	2008-06-09 18:32:43 UTC (rev 15091)
@@ -497,9 +497,18 @@
         if (connection_or_nonopen_was_started_here(or_conn)) {
           rep_hist_note_connect_failed(or_conn->identity_digest, now);
           entry_guard_register_connect_status(or_conn->identity_digest,0,now);
-          router_set_status(or_conn->identity_digest, 0);
-          control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED,
-                  control_tls_error_to_reason(or_conn->tls_error));
+          if (!get_options()->HttpsProxy)
+            router_set_status(or_conn->identity_digest, 0);
+          if (conn->state == OR_CONN_STATE_CONNECTING) {
+            control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED, 0);
+            control_event_bootstrap_problem(
+              tor_socket_strerror(or_conn->socket_error), 0);
+          } else {
+            int reason = control_tls_error_to_reason(or_conn->tls_error);
+            control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED,
+                                         reason);
+            control_event_bootstrap_problem("foo", reason);
+          }
         }
         /* Inform any pending (not attached) circs that they should
          * give up. */
@@ -1842,13 +1851,15 @@
   before = buf_datalen(conn->inbuf);
   if (connection_read_to_buf(conn, &max_to_read) < 0) {
     /* There's a read error; kill the connection.*/
-    connection_close_immediate(conn); /* Don't flush; connection is dead. */
+    if (conn->type == CONN_TYPE_OR)
+      TO_OR_CONN(conn)->socket_error = tor_socket_errno(conn->s);
     if (CONN_IS_EDGE(conn)) {
       edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
       connection_edge_end_errno(edge_conn);
       if (edge_conn->socks_request) /* broken, don't send a socks reply back */
         edge_conn->socks_request->has_finished = 1;
     }
+    connection_close_immediate(conn); /* Don't flush; connection is dead. */
     connection_mark_for_close(conn);
     return -1;
   }
@@ -2145,14 +2156,11 @@
         log_info(LD_NET,"in-progress connect failed. Removing.");
         if (CONN_IS_EDGE(conn))
           connection_edge_end_errno(TO_EDGE_CONN(conn));
+        if (conn->type == CONN_TYPE_OR)
+          TO_OR_CONN(conn)->socket_error = e;
 
         connection_close_immediate(conn);
         connection_mark_for_close(conn);
-        /* it's safe to pass OPs to router_set_status(), since it just
-         * ignores unrecognized routers
-         */
-        if (conn->type == CONN_TYPE_OR && !get_options()->HttpsProxy)
-          router_set_status(TO_OR_CONN(conn)->identity_digest, 0);
         return -1;
       } else {
         return 0; /* no change, see if next time is better */

Modified: tor/trunk/src/or/connection_or.c
===================================================================
--- tor/trunk/src/or/connection_or.c	2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/src/or/connection_or.c	2008-06-09 18:32:43 UTC (rev 15091)
@@ -546,6 +546,8 @@
       }
       control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED,
               END_OR_CONN_REASON_TCP_REFUSED);
+      /* XXX connection_connect() can fail for all sorts of other reasons */
+      control_event_bootstrap_problem("foo", END_OR_CONN_REASON_TCP_REFUSED);
       connection_free(TO_CONN(conn));
       return NULL;
     case 0:
@@ -795,6 +797,7 @@
       router_set_status(conn->identity_digest, 0);
       control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED,
               END_OR_CONN_REASON_OR_IDENTITY);
+      control_event_bootstrap_problem("foo", END_OR_CONN_REASON_OR_IDENTITY);
       as_advertised = 0;
     }
     if (authdir_mode_tests_reachability(options)) {

Modified: tor/trunk/src/or/control.c
===================================================================
--- tor/trunk/src/or/control.c	2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/src/or/control.c	2008-06-09 18:32:43 UTC (rev 15091)
@@ -3825,7 +3825,7 @@
 
 /** Convert the name of a bootstrapping phase <b>s</b> into strings
  * <b>tag</b> and <b>summary</b> suitable for display by the controller. */
-static void
+static int
 bootstrap_status_to_string(bootstrap_status_t s, const char **tag,
                            const char **summary)
 {
@@ -3887,48 +3887,94 @@
       *summary = "Done";
       break;
     default:
-      log_warn(LD_BUG, "Unrecognized bootstrap status code %d", s);
+//      log_warn(LD_BUG, "Unrecognized bootstrap status code %d", s);
       *tag = *summary = "unknown";
+      return -1;
   }
+  return 0;
 }
 
+/** What percentage through the bootstrap process are we? We remember
+ * this so we can avoid sending redundant bootstrap status events, and
+ * so we can guess context for the bootstrap messages which are
+ * ambiguous. */
+static int bootstrap_percent = 0;
+
+/** How many problems have we had getting to the next bootstrapping phase?
+ * These include failure to establish a connection to a Tor relay,
+ * failures to finish the TLS handshake, failures to validate the
+ * consensus document, etc. */
+static int bootstrap_problems = 0;
+
+/* We only tell the controller once we've hit a threshold of problems
+ * for the current phase. */
+#define BOOTSTRAP_PROBLEM_THRESHOLD 10
+
 /** Called when Tor has made progress at bootstrapping its directory
- * information and initial circuits. <b>status</b> is the new status,
- * that is, what task we will be doing next. <b>percent</b> is zero if
- * we just started this task, else it represents progress on the task.
- */
-int
-control_event_bootstrap(bootstrap_status_t status, int percent)
+ * information and initial circuits.
+ *
+ * <b>status</b> is the new status, that is, what task we will be doing
+ * next. <b>percent</b> is zero if we just started this task, else it
+ * represents progress on the task. */
+void
+control_event_bootstrap(bootstrap_status_t status, int progress)
 {
-  static int last_percent = 0;
   const char *tag, *summary;
 
-  if (last_percent == 100)
-    return 0; /* already bootstrapped; nothing to be done here. */
+  if (bootstrap_percent == 100)
+    return; /* already bootstrapped; nothing to be done here. */
 
   /* special case for handshaking status, since our TLS handshaking code
    * can't distinguish what the connection is going to be for. */
   if (status == BOOTSTRAP_STATUS_HANDSHAKE) {
-    if (last_percent < BOOTSTRAP_STATUS_CONN_OR) {
+    if (bootstrap_percent < BOOTSTRAP_STATUS_CONN_OR) {
       status =  BOOTSTRAP_STATUS_HANDSHAKE_DIR;
     } else {
       status = BOOTSTRAP_STATUS_HANDSHAKE_OR;
     }
   }
 
-  if (status > last_percent || (percent && percent > last_percent)) {
+  if (status > bootstrap_percent ||
+      (progress && progress > bootstrap_percent)) {
     bootstrap_status_to_string(status, &tag, &summary);
     log_notice(LD_CONTROL, "Bootstrapped %d%%: %s.",
-               percent ? percent : status, summary);
+               progress ? progress : status, summary);
     control_event_client_status(LOG_NOTICE,
         "BOOTSTRAP PROGRESS=%d TAG=%s SUMMARY=\"%s\"",
-        percent ? percent : status, tag, summary);
+        progress ? progress : status, tag, summary);
+    if (status > bootstrap_percent) {
+      bootstrap_percent = status; /* new milestone reached */
+    }
+    if (progress > bootstrap_percent) {
+      /* incremental progress within a milestone */
+      bootstrap_percent = progress;
+    }
+    bootstrap_problems = 0; /* Progress! Reset our problem counter. */
   }
+}
 
-  if (status > last_percent) /* new milestone reached */
-    last_percent = status ;
-  if (percent > last_percent) /* incremental progress within a milestone */
-    last_percent = percent;
-  return 0;
+/* Called when Tor has failed to make bootstrapping progress in a way
+ * that indicates a problem. <b>warn</b> gives a hint as to why, and
+ * <b>reason</b> provides an "or_conn_end_reason" tag).
+ */
+void
+control_event_bootstrap_problem(const char *warn, int reason)
+{
+  int status = bootstrap_percent;
+  const char *tag, *summary;
+
+//  if (++bootstrap_problems != BOOTSTRAP_PROBLEM_THRESHOLD)
+//    return; /* no worries yet */
+
+  while (bootstrap_status_to_string(status, &tag, &summary) < 0)
+    status--; /* find a recognized status string based on current progress */
+
+  log_warn(LD_CONTROL, "Problem bootstrapping. Stuck at %d%%: %s. (%s; %s)",
+           status, summary, warn,
+           or_conn_end_reason_to_string(reason));
+  control_event_client_status(LOG_WARN,
+      "BOOTSTRAP PROGRESS=%d TAG=%s SUMMARY=\"%s\" WARNING=\"%s\" %s",
+      bootstrap_percent, tag, summary, warn,
+      or_conn_end_reason_to_string(reason));
 }
 

Modified: tor/trunk/src/or/directory.c
===================================================================
--- tor/trunk/src/or/directory.c	2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/src/or/directory.c	2008-06-09 18:32:43 UTC (rev 15091)
@@ -2186,7 +2186,8 @@
     case DIR_PURPOSE_FETCH_CONSENSUS:     kind = "dl/consensus"; break;
     case DIR_PURPOSE_FETCH_CERTIFICATE:   kind = "dl/cert"; break;
     case DIR_PURPOSE_FETCH_STATUS_VOTE:   kind = "dl/vote"; break;
-    case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: kind = "dl/detached_sig"; break;
+    case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: kind = "dl/detached_sig";
+         break;
     case DIR_PURPOSE_FETCH_SERVERDESC:    kind = "dl/server"; break;
     case DIR_PURPOSE_FETCH_EXTRAINFO:     kind = "dl/extra"; break;
     case DIR_PURPOSE_UPLOAD_DIR:          kind = "dl/ul-dir"; break;

Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h	2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/src/or/or.h	2008-06-09 18:32:43 UTC (rev 15091)
@@ -905,6 +905,9 @@
 
   tor_tls_t *tls; /**< TLS connection state. */
   int tls_error; /**< Last tor_tls error code. */
+  /* XXX either merge this with tls_error, or do all our activity right
+   * when we compute this value so we don't have to store it. */
+  int socket_error; /**< If conn dies, remember why. */
   /** When we last used this conn for any client traffic. If not
    * recent, we can rate limit it further. */
   time_t client_used;
@@ -3035,7 +3038,8 @@
   BOOTSTRAP_STATUS_DONE=100
 } bootstrap_status_t;
 
-int control_event_bootstrap(bootstrap_status_t status, int percent);
+void control_event_bootstrap(bootstrap_status_t status, int progress);
+void control_event_bootstrap_problem(const char *warn, int reason);
 
 #ifdef CONTROL_PRIVATE
 /* Used only by control.c and test.c */



More information about the tor-commits mailing list