commit 42e3c04a7a5fb47a97766e54c72885c5c2bf23a9 Author: Mike Perry mikeperry-git@fscked.org Date: Thu Oct 25 17:43:10 2012 -0700
Bug 3443: Don't count ORconn setup in circuit build time.
Also, add a hack Roger suggested where we're more patient if no circuits are opened yet. --- changes/bug3443 | 11 +++++ src/or/circuitbuild.c | 14 +++++- src/or/circuitlist.c | 9 +++- src/or/circuitstats.c | 10 ++-- src/or/circuituse.c | 118 +++++++++++++++++++++++++++++++++++++++++-------- src/or/control.c | 4 +- src/or/entrynodes.c | 2 +- src/or/entrynodes.h | 1 + src/or/or.h | 21 +++++++- 9 files changed, 157 insertions(+), 33 deletions(-)
diff --git a/changes/bug3443 b/changes/bug3443 new file mode 100644 index 0000000..01896ff --- /dev/null +++ b/changes/bug3443 @@ -0,0 +1,11 @@ + o Minor bugfixes + - Alter circuit build timeout measurement to start at the point + where we begin the CREATE/CREATE_FAST step (as opposed to circuit + initialization). This should make our timeout measurements more + uniform. Previously, we were sometimes including ORconn setup time + in our circuit build time measurements. Fixes bug #3443. + + o Minor features + - If we have no circuits open, use a relaxed timeout (the 95-percentile + cutoff) until a circuit succeeds. This should allow Tor to succeed + building circuits if the network connection drastically changes. diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 1fb93bb..7301874 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -522,6 +522,18 @@ circuit_deliver_create_cell(circuit_t *circ, uint8_t cell_type, CELL_DIRECTION_OUT, 0);
if (CIRCUIT_IS_ORIGIN(circ)) { + /* Update began timestamp for circuits starting their first hop */ + if (TO_ORIGIN_CIRCUIT(circ)->cpath->state == CPATH_STATE_CLOSED) { + if (circ->n_chan->state != CHANNEL_STATE_OPEN) { + log_warn(LD_CIRC, + "Got first hop for a circuit without an opened channel. " + "State: %s.", channel_state_to_string(circ->n_chan->state)); + tor_fragile_assert(); + } + + tor_gettimeofday(&circ->timestamp_began); + } + /* mark it so it gets better rate limiting treatment. */ channel_timestamp_client(circ->n_chan); } @@ -672,7 +684,7 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) struct timeval end; long timediff; tor_gettimeofday(&end); - timediff = tv_mdiff(&circ->base_.timestamp_created, &end); + timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
/* * If the circuit build time is much greater than we would have cut diff --git a/src/or/circuitlist.c b/src/or/circuitlist.c index abb8395..8f06c06 100644 --- a/src/or/circuitlist.c +++ b/src/or/circuitlist.c @@ -555,6 +555,11 @@ init_circuit_base(circuit_t *circ) { tor_gettimeofday(&circ->timestamp_created);
+ // Gets reset when we send CREATE_FAST. + // circuit_expire_building() expects these to be equal + // until the orconn is built. + circ->timestamp_began = circ->timestamp_created; + circ->package_window = circuit_initial_package_window(); circ->deliver_window = CIRCWINDOW_START;
@@ -777,7 +782,7 @@ circuit_dump_conn_details(int severity, "state %d (%s), born %ld:", conn_array_index, type, this_circid, other_circid, circ->state, circuit_state_to_string(circ->state), - (long)circ->timestamp_created.tv_sec); + (long)circ->timestamp_began.tv_sec); if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */ circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ)); } @@ -840,7 +845,7 @@ circuit_dump_chan_details(int severity, "state %d (%s), born %ld:", chan, type, this_circid, other_circid, circ->state, circuit_state_to_string(circ->state), - (long)circ->timestamp_created.tv_sec); + (long)circ->timestamp_began.tv_sec); if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */ circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ)); } diff --git a/src/or/circuitstats.c b/src/or/circuitstats.c index 6d529d1..4f12a6f 100644 --- a/src/or/circuitstats.c +++ b/src/or/circuitstats.c @@ -1469,11 +1469,6 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
max_time = circuit_build_times_max(cbt);
- /* Sometimes really fast guard nodes give us such a steep curve - * that this ends up being not that much greater than timeout_ms. - * Make it be at least 1 min to handle this case. */ - cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout()); - if (cbt->timeout_ms > max_time) { log_info(LD_CIRC, "Circuit build timeout of %dms is beyond the maximum build " @@ -1490,6 +1485,11 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt) cbt->close_ms = 2*max_time; }
+ /* Sometimes really fast guard nodes give us such a steep curve + * that this ends up being not that much greater than timeout_ms. + * Make it be at least 1 min to handle this case. */ + cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout()); + cbt->have_computed_timeout = 1; return 1; } diff --git a/src/or/circuituse.c b/src/or/circuituse.c index d3cde1d..a329afb 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -176,6 +176,13 @@ circuit_is_better(const origin_circuit_t *oa, const origin_circuit_t *ob, const uint8_t purpose = ENTRY_TO_CONN(conn)->purpose; int a_bits, b_bits;
+ /* If one of the circuits was allowed to live due to relaxing its timeout, + * it is definitely worse (it's probably a much slower path). */ + if (oa->relaxed_timeout && !ob->relaxed_timeout) + return 0; /* ob is better. It's not relaxed. */ + if (!oa->relaxed_timeout && ob->relaxed_timeout) + return 1; /* oa is better. It's not relaxed. */ + switch (purpose) { case CIRCUIT_PURPOSE_C_GENERAL: /* if it's used but less dirty it's best; @@ -187,7 +194,7 @@ circuit_is_better(const origin_circuit_t *oa, const origin_circuit_t *ob, return 1; } else { if (a->timestamp_dirty || - timercmp(&a->timestamp_created, &b->timestamp_created, >)) + timercmp(&a->timestamp_began, &b->timestamp_began, >)) return 1; if (ob->build_state->is_internal) /* XXX023 what the heck is this internal thing doing here. I @@ -279,7 +286,7 @@ circuit_get_best(const entry_connection_t *conn,
if (purpose == CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT && !must_be_open && circ->state != CIRCUIT_STATE_OPEN && - tv_mdiff(&now, &circ->timestamp_created) > circ_times.timeout_ms) { + tv_mdiff(&now, &circ->timestamp_began) > circ_times.timeout_ms) { intro_going_on_but_too_old = 1; continue; } @@ -365,8 +372,31 @@ circuit_expire_building(void) const or_options_t *options = get_options(); struct timeval now; cpath_build_state_t *build_state; + int any_opened_circs = 0;
tor_gettimeofday(&now); + + /* Check to see if we have any opened circuits. If we don't, + * we want to be more lenient with timeouts, in case the + * user has relocated and/or changed network connections. + * See bug #3443. */ + while (next_circ) { + if (!CIRCUIT_IS_ORIGIN(next_circ) || /* didn't originate here */ + next_circ->marked_for_close) /* don't mess with marked circs */ + continue; + + if (TO_ORIGIN_CIRCUIT(next_circ)->has_opened && + next_circ->state == CIRCUIT_STATE_OPEN && + TO_ORIGIN_CIRCUIT(next_circ)->build_state && + TO_ORIGIN_CIRCUIT(next_circ)->build_state->desired_path_len + == DEFAULT_ROUTE_LEN) { + any_opened_circs = 1; + break; + } + next_circ = next_circ->next; + } + next_circ = global_circuitlist; + #define SET_CUTOFF(target, msec) do { \ long ms = tor_lround(msec); \ struct timeval diff; \ @@ -391,8 +421,20 @@ circuit_expire_building(void) victim = next_circ; next_circ = next_circ->next; if (!CIRCUIT_IS_ORIGIN(victim) || /* didn't originate here */ - victim->marked_for_close) /* don't mess with marked circs */ + victim->marked_for_close) /* don't mess with marked circs */ + continue; + + /* If we haven't yet started the first hop, it means we don't have + * any orconns available, and thus have not started counting time yet + * for this circuit. See circuit_deliver_create_cell() and uses of + * timestamp_began. + * + * Continue to wait in this case. The ORConn should timeout + * independently and kill us then. + */ + if (TO_ORIGIN_CIRCUIT(victim)->cpath->state == CPATH_STATE_CLOSED) { continue; + }
build_state = TO_ORIGIN_CIRCUIT(victim)->build_state; if (build_state && build_state->onehop_tunnel) @@ -410,9 +452,40 @@ circuit_expire_building(void) if (TO_ORIGIN_CIRCUIT(victim)->hs_circ_has_timed_out) cutoff = hs_extremely_old_cutoff;
- if (timercmp(&victim->timestamp_created, &cutoff, >)) + if (timercmp(&victim->timestamp_began, &cutoff, >)) continue; /* it's still young, leave it alone */
+ if (!any_opened_circs) { + /* It's still young enough that we wouldn't close it, right? */ + if (timercmp(&victim->timestamp_began, &close_cutoff, >)) { + if (!TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout) { + int first_hop_succeeded = TO_ORIGIN_CIRCUIT(victim)->cpath->state + == CPATH_STATE_OPEN; + log_info(LD_CIRC, + "No circuits are opened. Relaxing timeout for " + "a circuit with channel state %s. %d guards are live.", + channel_state_to_string(victim->n_chan->state), + num_live_entry_guards()); + + /* We count the timeout here for CBT, because technically this + * was a timeout, and the timeout value needs to reset if we + * see enough of them. Note this means we also need to avoid + * double-counting below, too. */ + circuit_build_times_count_timeout(&circ_times, first_hop_succeeded); + TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout = 1; + } + continue; + } else { + log_notice(LD_CIRC, + "No circuits are opened. Relaxed timeout for " + "a circuit with channel state %s to %ldms. " + "However, it appears the circuit has timed out anyway. " + "%d guards are live. ", + channel_state_to_string(victim->n_chan->state), + (long)circ_times.close_ms, num_live_entry_guards()); + } + } + #if 0 /* some debug logs, to help track bugs */ if (victim->purpose >= CIRCUIT_PURPOSE_C_INTRODUCING && @@ -489,9 +562,12 @@ circuit_expire_building(void) /* Record this failure to check for too many timeouts * in a row. This function does not record a time value yet * (we do that later); it only counts the fact that we did - * have a timeout. */ - circuit_build_times_count_timeout(&circ_times, - first_hop_succeeded); + * have a timeout. We also want to avoid double-counting + * already "relaxed" circuits, which are counted above. */ + if (!TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout) { + circuit_build_times_count_timeout(&circ_times, + first_hop_succeeded); + } continue; }
@@ -500,16 +576,16 @@ circuit_expire_building(void) * it off at, we probably had a suspend event along this codepath, * and we should discard the value. */ - if (timercmp(&victim->timestamp_created, &extremely_old_cutoff, <)) { + if (timercmp(&victim->timestamp_began, &extremely_old_cutoff, <)) { log_notice(LD_CIRC, "Extremely large value for circuit build timeout: %lds. " "Assuming clock jump. Purpose %d (%s)", - (long)(now.tv_sec - victim->timestamp_created.tv_sec), + (long)(now.tv_sec - victim->timestamp_began.tv_sec), victim->purpose, circuit_purpose_to_string(victim->purpose)); } else if (circuit_build_times_count_close(&circ_times, first_hop_succeeded, - victim->timestamp_created.tv_sec)) { + victim->timestamp_began.tv_sec)) { circuit_build_times_set_timeout(&circ_times); } } @@ -791,7 +867,7 @@ circuit_build_needed_circs(time_t now) circ = circuit_get_youngest_clean_open(CIRCUIT_PURPOSE_C_GENERAL); if (get_options()->RunTesting && circ && - circ->timestamp_created.tv_sec + TESTING_CIRCUIT_INTERVAL < now) { + circ->timestamp_began.tv_sec + TESTING_CIRCUIT_INTERVAL < now) { log_fn(LOG_INFO,"Creating a new testing circuit."); circuit_launch(CIRCUIT_PURPOSE_C_GENERAL, 0); } @@ -911,7 +987,7 @@ circuit_expire_old_circuits_clientside(void) circ->purpose); circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED); } else if (!circ->timestamp_dirty && circ->state == CIRCUIT_STATE_OPEN) { - if (timercmp(&circ->timestamp_created, &cutoff, <)) { + if (timercmp(&circ->timestamp_began, &cutoff, <)) { if (circ->purpose == CIRCUIT_PURPOSE_C_GENERAL || circ->purpose == CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT || circ->purpose == CIRCUIT_PURPOSE_S_ESTABLISH_INTRO || @@ -921,7 +997,7 @@ circuit_expire_old_circuits_clientside(void) circ->purpose == CIRCUIT_PURPOSE_S_CONNECT_REND) { log_debug(LD_CIRC, "Closing circuit that has been unused for %ld msec.", - tv_mdiff(&circ->timestamp_created, &now)); + tv_mdiff(&circ->timestamp_began, &now)); circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED); } else if (!TO_ORIGIN_CIRCUIT(circ)->is_ancient) { /* Server-side rend joined circuits can end up really old, because @@ -935,7 +1011,7 @@ circuit_expire_old_circuits_clientside(void) "Ancient non-dirty circuit %d is still around after " "%ld milliseconds. Purpose: %d (%s)", TO_ORIGIN_CIRCUIT(circ)->global_identifier, - tv_mdiff(&circ->timestamp_created, &now), + tv_mdiff(&circ->timestamp_began, &now), circ->purpose, circuit_purpose_to_string(circ->purpose)); TO_ORIGIN_CIRCUIT(circ)->is_ancient = 1; @@ -1316,20 +1392,24 @@ circuit_launch_by_extend_info(uint8_t purpose, circ = circuit_find_to_cannibalize(purpose, extend_info, flags); if (circ) { uint8_t old_purpose = circ->base_.purpose; - struct timeval old_timestamp_created; + struct timeval old_timestamp_began;
log_info(LD_CIRC,"Cannibalizing circ '%s' for purpose %d (%s)", build_state_get_exit_nickname(circ->build_state), purpose, circuit_purpose_to_string(purpose));
circuit_change_purpose(TO_CIRCUIT(circ), purpose); - /* reset the birth date of this circ, else expire_building + /* Reset the start date of this circ, else expire_building * will see it and think it's been trying to build since it - * began. */ - tor_gettimeofday(&circ->base_.timestamp_created); + * began. + * + * Technically, the code should reset this when the + * create cell is finally sent, but we're close enough + * here. */ + tor_gettimeofday(&circ->base_.timestamp_began);
control_event_circuit_cannibalized(circ, old_purpose, - &old_timestamp_created); + &old_timestamp_began);
switch (purpose) { case CIRCUIT_PURPOSE_C_ESTABLISH_REND: diff --git a/src/or/control.c b/src/or/control.c index 0b7f04a..d935265 100644 --- a/src/or/control.c +++ b/src/or/control.c @@ -3578,9 +3578,9 @@ control_event_circuit_status_minor(origin_circuit_t *circ, /* event_tail can currently be up to 130 chars long */ const char *hs_state_str = circuit_purpose_to_controller_hs_state_string(purpose); - const struct timeval *old_timestamp_created = tv; + const struct timeval *old_timestamp_began = tv; char tbuf[ISO_TIME_USEC_LEN+1]; - format_iso_time_nospace_usec(tbuf, old_timestamp_created); + format_iso_time_nospace_usec(tbuf, old_timestamp_began);
tor_snprintf(event_tail, sizeof(event_tail), " OLD_PURPOSE=%s%s%s OLD_TIME_CREATED=%s", diff --git a/src/or/entrynodes.c b/src/or/entrynodes.c index edb26dc..8712241 100644 --- a/src/or/entrynodes.c +++ b/src/or/entrynodes.c @@ -219,7 +219,7 @@ entry_is_live(entry_guard_t *e, int need_uptime, int need_capacity, }
/** Return the number of entry guards that we think are usable. */ -static int +int num_live_entry_guards(void) { int n = 0; diff --git a/src/or/entrynodes.h b/src/or/entrynodes.h index 1a12cf4..4d031c3 100644 --- a/src/or/entrynodes.h +++ b/src/or/entrynodes.h @@ -52,6 +52,7 @@ typedef struct entry_guard_t { entry_guard_t *entry_guard_get_by_id_digest(const char *digest); void entry_guards_changed(void); const smartlist_t *get_entry_guards(void); +int num_live_entry_guards(void);
#endif
diff --git a/src/or/or.h b/src/or/or.h index 2ac9f6b..195cb2b 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2702,10 +2702,21 @@ typedef struct circuit_t { * length ONIONSKIN_CHALLENGE_LEN. */ char *n_chan_onionskin;
- /** When was this circuit created? We keep this timestamp with a higher - * resolution than most so that the circuit-build-time tracking code can - * get millisecond resolution. */ + /** When did circuit construction actually begin (ie send the + * CREATE cell or begin cannibalization). + * + * Note: This timer will get reset if we decide to cannibalize + * a circuit. It may also get reset during certain phases of hidden + * service circuit use. + * + * We keep this timestamp with a higher resolution than most so that the + * circuit-build-time tracking code can get millisecond resolution. + */ + struct timeval timestamp_began; + + /** This timestamp marks when the init_circuit_base constructor ran. */ struct timeval timestamp_created; + /** When the circuit was first used, or 0 if the circuit is clean. * * XXXX023 Note that some code will artifically adjust this value backward @@ -2815,6 +2826,10 @@ typedef struct origin_circuit_t { * service-side introduction circuits never have this flag set.) */ unsigned int hs_circ_has_timed_out : 1;
+ /** Set iff this circuit has been given a relaxed timeout because + * no circuits have opened. Used to prevent spamming logs. */ + unsigned int relaxed_timeout : 1; + /** Set iff this is a service-side rendezvous circuit for which a * new connection attempt has been launched. We consider launching * a new service-side rend circ to a client when the previous one