commit 1125a4876b455d41b4c858cc97e8f8feef0fa8d0 Author: David Goulet dgoulet@ev0ke.net Date: Mon Jun 1 12:08:13 2015 -0400
Reuse intro points that failed but are still valid
There is a case where if the introduction circuit fails but the node is still in the consensus, we clean up the intro point and choose an other one. This commit fixes that by trying to reuse the existing intro point with a maximum value of retry.
A retry_nodes list is added to rend_services_introduce() and when we remove an invalid intro points that fits the use case mentionned before, we add the node to the retry list instead of removing it. Then, we retry on them before creating new ones.
This means that the requirement to remove an intro point changes from "if no intro circuit" to "if no intro circuit then if no node OR we've reached our maximum circuit creation count".
For now, the maximum retries is set to 3 which it completely arbitrary. It should also at some point be tied to the work done on detecting if our network is down or not.
Fixes #8239
Signed-off-by: David Goulet dgoulet@ev0ke.net --- src/or/or.h | 12 +++++++ src/or/rendservice.c | 85 +++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 79 insertions(+), 18 deletions(-)
diff --git a/src/or/or.h b/src/or/or.h index fc921a8..0deb4a7 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -4881,6 +4881,11 @@ typedef struct rend_encoded_v2_service_descriptor_t { * XXX023 Should this be configurable? */ #define INTRO_POINT_LIFETIME_MAX_SECONDS (24*60*60)
+/** The maximum number of circuit creation retry we do to an intro point + * before giving up. We try to reuse intro point that fails during their + * lifetime so this is a hard limit on the amount of time we do that. */ +#define MAX_INTRO_POINT_CIRCUIT_RETRIES 3 + /** Introduction point information. Used both in rend_service_t (on * the service side) and in rend_service_descriptor_t (on both the * client and service side). */ @@ -4930,6 +4935,13 @@ typedef struct rend_intro_point_t { * (start to) expire, or -1 if we haven't decided when this intro * point should expire. */ time_t time_to_expire; + + /** (Service side only) The amount of circuit creation we've made to this + * intro point. This is incremented every time we do a circuit relaunch on + * this object which is triggered when the circuit dies but the node is + * still in the consensus. After MAX_INTRO_POINT_CIRCUIT_RETRIES, we give + * up on it. */ + unsigned int circuit_retries; } rend_intro_point_t;
#define REND_PROTOCOL_VERSION_BITMASK_WIDTH 16 diff --git a/src/or/rendservice.c b/src/or/rendservice.c index 2d6a458..8c383a8 100644 --- a/src/or/rendservice.c +++ b/src/or/rendservice.c @@ -2751,6 +2751,14 @@ rend_service_intro_has_opened(origin_circuit_t *circuit) smartlist_len(service->expiring_nodes)) > (int)service->n_intro_points_wanted) { /* XXX023 remove cast */ const or_options_t *options = get_options(); + /* Remove the intro point associated with this circuit, it's being + * repurposed or closed thus cleanup memory. */ + rend_intro_point_t *intro = find_intro_point(circuit); + if (intro != NULL) { + smartlist_remove(service->intro_nodes, intro); + rend_intro_point_free(intro); + } + if (options->ExcludeNodes) { /* XXXX in some future version, we can test whether the transition is allowed or not given the actual nodes in the circuit. But for now, @@ -3344,15 +3352,19 @@ intro_point_should_expire_now(rend_intro_point_t *intro,
/** Iterate over intro points in the given service and remove the invalid * ones. For an intro point object to be considered invalid, the circuit - * needs to have disappeared. + * _and_ node need to have disappeared. * * If the intro point should expire, it's placed into the expiring_nodes * list of the service and removed from the active intro nodes list. * - * If <b>exclude_nodes</b> is not NULL, add the valid nodes to it. */ + * If <b>exclude_nodes</b> is not NULL, add the valid nodes to it. + * + * If <b>retry_nodes</b> is not NULL, add the valid node to it if the + * circuit disappeared but the node is still in the consensus. */ static void remove_invalid_intro_points(rend_service_t *service, - smartlist_t *exclude_nodes, time_t now) + smartlist_t *exclude_nodes, + smartlist_t *retry_nodes, time_t now) { tor_assert(service);
@@ -3365,6 +3377,12 @@ remove_invalid_intro_points(rend_service_t *service, origin_circuit_t *intro_circ = find_intro_circuit(intro, service->pk_digest);
+ /* Add the valid node to the exclusion list so we don't try to establish + * an introduction point to it again. */ + if (node && exclude_nodes) { + smartlist_add(exclude_nodes, (void*) node); + } + /* First, make sure we still have a valid circuit for this intro point. * If we dont, we'll give up on it and make a new one. */ if (intro_circ == NULL) { @@ -3372,10 +3390,23 @@ remove_invalid_intro_points(rend_service_t *service, " (circuit disappeared).", safe_str_client(extend_info_describe(intro->extend_info)), safe_str_client(service->service_id)); - rend_intro_point_free(intro); - SMARTLIST_DEL_CURRENT(service->intro_nodes, intro); - /* Useful, it indicates if the intro point has been freed. */ - intro = NULL; + /* Node is gone or we've reached our maximum circuit creationg retry + * count, clean up everything, we'll find a new one. */ + if (node == NULL || + intro->circuit_retries >= MAX_INTRO_POINT_CIRCUIT_RETRIES) { + rend_intro_point_free(intro); + SMARTLIST_DEL_CURRENT(service->intro_nodes, intro); + /* We've just killed the intro point, nothing left to do. */ + continue; + } + + /* The intro point is still alive so let's try to use it again because + * we have a published descriptor containing it. Keep the intro point + * in the intro_nodes list because it's still valid, we are rebuilding + * a circuit to it. */ + if (retry_nodes) { + smartlist_add(retry_nodes, intro); + } } /* else, the circuit is valid so in both cases, node being alive or not, * we leave the circuit and intro point object as is. Closing the @@ -3384,19 +3415,13 @@ remove_invalid_intro_points(rend_service_t *service,
/* Now, check if intro point should expire. If it does, queue it so * it can be cleaned up once it has been replaced properly. */ - if (intro != NULL && intro_point_should_expire_now(intro, now)) { + if (intro_point_should_expire_now(intro, now)) { log_info(LD_REND, "Expiring %s as intro point for %s.", safe_str_client(extend_info_describe(intro->extend_info)), safe_str_client(service->service_id)); smartlist_add(service->expiring_nodes, intro); SMARTLIST_DEL_CURRENT(service->intro_nodes, intro); } - - /* Add the valid node to the exclusion list so we don't try to - * establish an introduction point to it again. */ - if (node && exclude_nodes) { - smartlist_add(exclude_nodes, (void*)node); - } } SMARTLIST_FOREACH_END(intro); }
@@ -3447,14 +3472,19 @@ rend_services_introduce(void) /* List of nodes we need to _exclude_ when choosing a new node to * establish an intro point to. */ smartlist_t *exclude_nodes; + /* List of nodes we need to retry to build a circuit on them because the + * node is valid but circuit died. */ + smartlist_t *retry_nodes;
if (!have_completed_a_circuit()) return;
exclude_nodes = smartlist_new(); + retry_nodes = smartlist_new(); now = time(NULL);
SMARTLIST_FOREACH_BEGIN(rend_service_list, rend_service_t *, service) { + int r; /* Number of intro points we want to open and add to the intro nodes * list of the service. */ unsigned int n_intro_points_to_open; @@ -3464,6 +3494,7 @@ rend_services_introduce(void) /* Different service are allowed to have the same introduction point as * long as they are on different circuit thus why we clear this list. */ smartlist_clear(exclude_nodes); + smartlist_clear(retry_nodes);
/* This retry period is important here so we don't stress circuit * creation. */ @@ -3478,9 +3509,27 @@ rend_services_introduce(void) continue; }
- /* Cleanup the invalid intro points and save the node objects, if any, - * in the exclude_nodes list. */ - remove_invalid_intro_points(service, exclude_nodes, now); + /* Cleanup the invalid intro points and save the node objects, if apply, + * in the exclude_nodes and retry_nodes list. */ + remove_invalid_intro_points(service, exclude_nodes, retry_nodes, now); + + /* Let's try to rebuild circuit on the nodes we want to retry on. */ + SMARTLIST_FOREACH_BEGIN(retry_nodes, rend_intro_point_t *, intro) { + r = rend_service_launch_establish_intro(service, intro); + if (r < 0) { + log_warn(LD_REND, "Error launching circuit to node %s for service %s.", + safe_str_client(extend_info_describe(intro->extend_info)), + safe_str_client(service->service_id)); + /* Unable to launch a circuit to that intro point, remove it from + * the valid list so we can create a new one. */ + smartlist_remove(service->intro_nodes, intro); + rend_intro_point_free(intro); + continue; + } + intro->circuit_retries++; + } SMARTLIST_FOREACH_END(intro); + + /* Avoid mismatched signed comparaison below. */ intro_nodes_len = (unsigned int) smartlist_len(service->intro_nodes);
/* Quiescent state, no node expiring and we have more or the amount of @@ -3510,7 +3559,6 @@ rend_services_introduce(void) }
for (i = 0; i < (int) n_intro_points_to_open; i++) { - int r; const node_t *node; rend_intro_point_t *intro; router_crn_flags_t flags = CRN_NEED_UPTIME|CRN_NEED_DESC; @@ -3556,6 +3604,7 @@ rend_services_introduce(void) } } SMARTLIST_FOREACH_END(service); smartlist_free(exclude_nodes); + smartlist_free(retry_nodes); }
#define MIN_REND_INITIAL_POST_DELAY (30)