This is an automated email from the git hooks/post-receive script.
dgoulet pushed a change to branch main in repository tor.
from 7747469dbe doc: Minor changes to the Releasing doc new 7ce3edaee0 rephist: Introduce a fraction and period for overload onionskin new b4890e7534 test: Unit tests for overload onionskin ntor new 687df93a75 Merge branch 'maint-0.4.6'
The 3 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
Summary of changes:
This is an automated email from the git hooks/post-receive script.
dgoulet pushed a commit to branch main in repository tor.
commit 7ce3edaee0eaf93f2f8fa60b859cf49118b9523b Author: David Goulet dgoulet@torproject.org AuthorDate: Wed Mar 2 13:20:07 2022 -0500
rephist: Introduce a fraction and period for overload onionskin
This code was heavily reused from the previous DNS timeout work done in ticket #40491 that was removed afterall from our code.
Closes #40560
Signed-off-by: David Goulet dgoulet@torproject.org --- src/feature/relay/onion_queue.c | 2 - src/feature/stats/rephist.c | 208 +++++++++++++++++++++++++++++++++++----- src/feature/stats/rephist.h | 10 ++ 3 files changed, 195 insertions(+), 25 deletions(-)
diff --git a/src/feature/relay/onion_queue.c b/src/feature/relay/onion_queue.c index 85ec0dc74a..c4023509ad 100644 --- a/src/feature/relay/onion_queue.c +++ b/src/feature/relay/onion_queue.c @@ -166,8 +166,6 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin) RATELIM_INIT(WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL); if (onionskin->handshake_type == ONION_HANDSHAKE_TYPE_NTOR) { char *m; - /* Note this ntor onionskin drop as an overload */ - rep_hist_note_overload(OVERLOAD_GENERAL); if ((m = rate_limit_log(&last_warned, approx_time()))) { log_warn(LD_GENERAL, "Your computer is too slow to handle this many circuit " diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c index 2bfa14d326..ae4420ae50 100644 --- a/src/feature/stats/rephist.c +++ b/src/feature/stats/rephist.c @@ -291,25 +291,6 @@ overload_general_dns_assessment(void) approx_time() + overload_dns_timeout_period_secs; }
-/** Called just before the consensus will be replaced. Update the consensus - * parameters in case they changed. */ -void -rep_hist_consensus_has_changed(const networkstatus_t *ns) -{ - overload_dns_timeout_fraction = - networkstatus_get_param(ns, "overload_dns_timeout_scale_percent", - OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT, - OVERLOAD_DNS_TIMEOUT_PERCENT_MIN, - OVERLOAD_DNS_TIMEOUT_PERCENT_MAX) / - OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0; - - overload_dns_timeout_period_secs = - networkstatus_get_param(ns, "overload_dns_timeout_period_secs", - OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT, - OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN, - OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX); -} - /** Note a DNS error for the given given libevent DNS record type and error * code. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. * @@ -1913,12 +1894,144 @@ STATIC int onion_handshakes_requested[MAX_ONION_HANDSHAKE_TYPE+1] = {0}; STATIC int onion_handshakes_assigned[MAX_ONION_HANDSHAKE_TYPE+1] = {0}; /**@}*/
+/** Counters keeping the same stats as above but for the entire duration of the + * process (not reset). */ +static uint64_t stats_n_onionskin_assigned[MAX_ONION_STAT_TYPE+1] = {0}; +static uint64_t stats_n_onionskin_dropped[MAX_ONION_STAT_TYPE+1] = {0}; + +/* We use a scale here so we can represent percentages with decimal points by + * scaling the value by this factor and so 0.5% becomes a value of 500. + * Default is 1% and thus min and max range is 0 to 100%. */ +#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE 1000.0 +#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT 1000 +#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_MIN 0 +#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_MAX 100000 + +/** Consensus parameter: indicate what fraction of ntor onionskin drop over the + * total number of requests must be reached before we trigger a general + * overload signal.*/ +static double overload_onionskin_ntor_fraction = + OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT / + OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE / 100.0; + +/* Number of seconds for the assessment period. Default is 6 hours (21600) and + * the min max range is within a 32bit value. We align this period to the + * Heartbeat so the logs would match this period more or less. */ +#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT (60 * 60 * 6) +#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MIN 0 +#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MAX INT32_MAX + +/** Consensus parameter: Period, in seconds, over which we count the number of + * ntor onionskins requests and how many were dropped. After that period, we + * assess if we trigger an overload or not. */ +static int32_t overload_onionskin_ntor_period_secs = + OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT; + +/** Structure containing information for an assessment period of the onionskin + * drop overload general signal. + * + * It is used to track, within a time period, how many requests we've gotten + * and how many were dropped. The overload general signal is decided from these + * depending on some consensus parameters. */ +typedef struct { + /** Total number of ntor onionskin requested for an assessment period. */ + uint64_t n_ntor_requested; + + /** Total number of dropped ntor onionskins for an assessment period. */ + uint64_t n_ntor_dropped; + + /** When is the next assessment time of the general overload for ntor + * onionskin drop. Once this time is reached, all stats are reset and this + * time is set to the next assessment time. */ + time_t next_assessment_time; +} overload_onionskin_assessment_t; + +/** Keep track of the onionskin requests for an assessment period. */ +static overload_onionskin_assessment_t overload_onionskin_assessment; + +/** + * We combine ntorv3 and ntor into the same stat, so we must + * use this function to covert the cell type to a stat index. + */ +static inline uint16_t +onionskin_type_to_stat(uint16_t type) +{ + if (BUG(type > MAX_ONION_STAT_TYPE)) { + return MAX_ONION_STAT_TYPE; // use ntor if out of range + } + + return type; +} + +/** Assess our ntor handshake statistics and decide if we need to emit a + * general overload signal. + * + * Regardless of overloaded or not, if the assessment time period has passed, + * the stats are reset back to 0 and the assessment time period updated. + * + * This is called when a ntor handshake is _requested_ because we want to avoid + * to have an assymetric situation where requested counter is reset to 0 but + * then a drop happens leading to the drop counter being incremented while the + * requested counter is 0. */ +static void +overload_general_onionskin_assessment(void) +{ + /* Initialize the time. Should be done once. */ + if (overload_onionskin_assessment.next_assessment_time == 0) { + goto reset; + } + + /* Not the time yet. */ + if (overload_onionskin_assessment.next_assessment_time > approx_time()) { + goto done; + } + + /* Make sure we have enough requests to be able to make a proper assessment. + * We want to avoid 1 single request/drop to trigger an overload as we want + * at least the number of requests to be above the scale of our fraction. */ + if (overload_onionskin_assessment.n_ntor_requested < + OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE) { + goto done; + } + + /* Lets see if we can signal a general overload. */ + double fraction = (double) overload_onionskin_assessment.n_ntor_dropped / + (double) overload_onionskin_assessment.n_ntor_requested; + if (fraction >= overload_onionskin_ntor_fraction) { + log_notice(LD_HIST, "General overload -> Ntor dropped (%" PRIu64 ") " + "fraction %.4f%% is above threshold of %.4f%%", + overload_onionskin_assessment.n_ntor_dropped, + fraction * 100.0, + overload_onionskin_ntor_fraction * 100.0); + rep_hist_note_overload(OVERLOAD_GENERAL); + } + + reset: + /* Reset counters for the next period. */ + overload_onionskin_assessment.n_ntor_dropped = 0; + overload_onionskin_assessment.n_ntor_requested = 0; + overload_onionskin_assessment.next_assessment_time = + approx_time() + overload_onionskin_ntor_period_secs; + + done: + return; +} + /** A new onionskin (using the <b>type</b> handshake) has arrived. */ void rep_hist_note_circuit_handshake_requested(uint16_t type) { - if (type <= MAX_ONION_HANDSHAKE_TYPE) - onion_handshakes_requested[type]++; + uint16_t stat = onionskin_type_to_stat(type); + + onion_handshakes_requested[stat]++; + + /* Only relays get to record requested onionskins. */ + if (stat == ONION_HANDSHAKE_TYPE_NTOR) { + /* Assess if we've reached the overload general signal. */ + overload_general_onionskin_assessment(); + + overload_onionskin_assessment.n_ntor_requested++; + } }
/** We've sent an onionskin (using the <b>type</b> handshake) to a @@ -1926,8 +2039,24 @@ rep_hist_note_circuit_handshake_requested(uint16_t type) void rep_hist_note_circuit_handshake_assigned(uint16_t type) { - if (type <= MAX_ONION_HANDSHAKE_TYPE) - onion_handshakes_assigned[type]++; + onion_handshakes_assigned[onionskin_type_to_stat(type)]++; + stats_n_onionskin_assigned[onionskin_type_to_stat(type)]++; +} + +/** We've just drop an onionskin (using the <b>type</b> handshake) due to being + * overloaded. */ +void +rep_hist_note_circuit_handshake_dropped(uint16_t type) +{ + uint16_t stat = onionskin_type_to_stat(type); + + stats_n_onionskin_dropped[stat]++; + + /* Only relays get to record requested onionskins. */ + if (stat == ONION_HANDSHAKE_TYPE_NTOR) { + /* Note the dropped ntor in the overload assessment object. */ + overload_onionskin_assessment.n_ntor_dropped++; + } }
/** Get the circuit handshake value that is requested. */ @@ -2522,6 +2651,39 @@ rep_hist_free_all(void) tor_assert_nonfatal_once(rephist_total_num == 0); }
+/** Called just before the consensus will be replaced. Update the consensus + * parameters in case they changed. */ +void +rep_hist_consensus_has_changed(const networkstatus_t *ns) +{ + overload_onionskin_ntor_fraction = + networkstatus_get_param(ns, "overload_onionskin_ntor_scale_percent", + OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT, + OVERLOAD_ONIONSKIN_NTOR_PERCENT_MIN, + OVERLOAD_ONIONSKIN_NTOR_PERCENT_MAX) / + OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE / 100.0; + + overload_onionskin_ntor_period_secs = + networkstatus_get_param(ns, "overload_onionskin_ntor_period_secs", + OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT, + OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MIN, + OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MAX); + + /** XXX: Unused parameters. */ + overload_dns_timeout_fraction = + networkstatus_get_param(ns, "overload_dns_timeout_scale_percent", + OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT, + OVERLOAD_DNS_TIMEOUT_PERCENT_MIN, + OVERLOAD_DNS_TIMEOUT_PERCENT_MAX) / + OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0; + + overload_dns_timeout_period_secs = + networkstatus_get_param(ns, "overload_dns_timeout_period_secs", + OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT, + OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN, + OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX); +} + #ifdef TOR_UNIT_TESTS /* only exists for unit tests: get HSv2 stats object */ const hs_v2_stats_t * diff --git a/src/feature/stats/rephist.h b/src/feature/stats/rephist.h index 749b4996a8..891827c8e6 100644 --- a/src/feature/stats/rephist.h +++ b/src/feature/stats/rephist.h @@ -58,6 +58,7 @@ time_t rep_hist_desc_stats_write(time_t now);
void rep_hist_note_circuit_handshake_requested(uint16_t type); void rep_hist_note_circuit_handshake_assigned(uint16_t type); +void rep_hist_note_circuit_handshake_dropped(uint16_t type); void rep_hist_log_circuit_handshake_stats(time_t now);
MOCK_DECL(int, rep_hist_get_circuit_handshake_requested, (uint16_t type)); @@ -79,8 +80,17 @@ void rep_hist_free_all(void); void rep_hist_note_negotiated_link_proto(unsigned link_proto, int started_here); void rep_hist_log_link_protocol_counts(void); + +uint64_t rep_hist_get_n_dns_error(int type, uint8_t error); +uint64_t rep_hist_get_n_dns_request(int type); +void rep_hist_note_dns_request(int type); +void rep_hist_note_dns_error(int type, uint8_t error); + void rep_hist_consensus_has_changed(const networkstatus_t *ns);
+/** We have 3 stat types: tap, fast, and ntor. The max type is ntor (2) */ +#define MAX_ONION_STAT_TYPE ONION_HANDSHAKE_TYPE_NTOR + extern uint64_t rephist_total_alloc; extern uint32_t rephist_total_num; #ifdef TOR_UNIT_TESTS
This is an automated email from the git hooks/post-receive script.
dgoulet pushed a commit to branch main in repository tor.
commit b4890e753459bb1aeb1c258e98fdb2c598bb1369 Author: David Goulet dgoulet@torproject.org AuthorDate: Tue Mar 15 13:28:04 2022 -0400
test: Unit tests for overload onionskin ntor
Part of #40560
Signed-off-by: David Goulet dgoulet@torproject.org --- src/test/test_stats.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+)
diff --git a/src/test/test_stats.c b/src/test/test_stats.c index af035ae54c..5b40afc64e 100644 --- a/src/test/test_stats.c +++ b/src/test/test_stats.c @@ -867,6 +867,65 @@ test_overload_stats(void *arg) tor_free(stats_str); }
+/** Test the overload stats logic. */ +static void +test_overload_onionskin_ntor(void *arg) +{ + char *stats_str = NULL; + (void) arg; + uint16_t type = ONION_HANDSHAKE_TYPE_NTOR; + + /* Lets simulate a series of timeouts but below our default 1% threshold. */ + + for (int i = 0; i < 1000; i++) { + rep_hist_note_circuit_handshake_requested(type); + /* This should trigger 9 drop which is just below 1% (10) */ + if (i > 0 && !(i % 100)) { + rep_hist_note_circuit_handshake_dropped(type); + } + } + + /* No overload yet. */ + stats_str = rep_hist_get_overload_general_line(); + tt_assert(!stats_str); + + /* Move it 6 hours in the future and see if we get a general overload. */ + update_approx_time(approx_time() + 21600); + + /* This request should NOT trigger the general overload because we are below + * our default of 1%. */ + rep_hist_note_circuit_handshake_requested(type); + stats_str = rep_hist_get_overload_general_line(); + tt_assert(!stats_str); + + /* We'll now go above our 1% threshold. */ + for (int i = 0; i < 1000; i++) { + rep_hist_note_circuit_handshake_requested(type); + /* This should trigger 10 timeouts which is our threshold of 1% (10) */ + if (!(i % 10)) { + rep_hist_note_circuit_handshake_dropped(type); + } + } + + /* Move it 6 hours in the future and see if we get a general overload. */ + update_approx_time(approx_time() + 21600); + + /* This request should trigger the general overload because above 1%. */ + rep_hist_note_circuit_handshake_requested(type); + stats_str = rep_hist_get_overload_general_line(); + tt_assert(stats_str); + tor_free(stats_str); + + /* Move 72h in the future, we should NOT get an overload anymore. */ + update_approx_time(approx_time() + (72 * 3600)); + + stats_str = rep_hist_get_overload_general_line(); + tt_assert(!stats_str); + + done: + tor_free(stats_str); +} + #define ENT(name) \ { #name, test_ ## name , 0, NULL, NULL } #define FORK(name) \ @@ -883,6 +942,7 @@ struct testcase_t stats_tests[] = { FORK(rephist_v3_onions), FORK(load_stats_file), FORK(overload_stats), + FORK(overload_onionskin_ntor),
END_OF_TESTCASES };
This is an automated email from the git hooks/post-receive script.
dgoulet pushed a commit to branch main in repository tor.
commit 687df93a7526e7c867dd4a35eaea689b26ef6870 Merge: 7747469dbe b4890e7534 Author: David Goulet dgoulet@torproject.org AuthorDate: Thu Apr 14 09:25:25 2022 -0400
Merge branch 'maint-0.4.6'
tor-commits@lists.torproject.org