commit d4f4108601fe7e2614f30055ff5abe00460f6a12 Author: Alexander Færøy ahf@torproject.org Date: Sat Dec 16 02:41:21 2017 +0100
Add MainloopStats option.
This patch adds support for MainloopStats that allow developers to get main event loop statistics via Tor's heartbeat status messages. The new status log message will show how many succesful, erroneous, and idle event loop iterations we have had.
See: https://bugs.torproject.org/24605 --- changes/bug24605 | 4 +++ doc/tor.1.txt | 5 ++++ src/or/config.c | 5 ++++ src/or/main.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/or/main.h | 5 ++++ src/or/or.h | 2 ++ src/or/status.c | 14 ++++++++++ 7 files changed, 117 insertions(+), 1 deletion(-)
diff --git a/changes/bug24605 b/changes/bug24605 new file mode 100644 index 000000000..7ee292b27 --- /dev/null +++ b/changes/bug24605 @@ -0,0 +1,4 @@ + o Minor features (instrumentation): + - Add the MainloopStats option to Tor that allows developers to get + instrumentation information from the main event loop via the heartbeat + messages. Closes ticket 24605. diff --git a/doc/tor.1.txt b/doc/tor.1.txt index b2218e33c..64114d6e6 100644 --- a/doc/tor.1.txt +++ b/doc/tor.1.txt @@ -2032,6 +2032,11 @@ is non-zero): to 0 will disable the heartbeat. Otherwise, it must be at least 30 minutes. (Default: 6 hours)
+[[MainloopStats]] **MainloopStats** **0**|**1**:: + Log main loop statistics every **HeartbeatPeriod** seconds. This is a log + level __notice__ message designed to help developers instrumenting Tor's + main event loop. (Default: 0) + [[AccountingMax]] **AccountingMax** __N__ **bytes**|**KBytes**|**MBytes**|**GBytes**|**TBytes**|**KBits**|**MBits**|**GBits**|**TBits**:: Limits the max number of bytes sent and received within a set time period using a given calculation rule (see: AccountingStart, AccountingRule). diff --git a/src/or/config.c b/src/or/config.c index fa80dad57..3ae3af55a 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -360,6 +360,7 @@ static config_var_t option_vars_[] = { V(GuardLifetime, INTERVAL, "0 minutes"), V(HardwareAccel, BOOL, "0"), V(HeartbeatPeriod, INTERVAL, "6 hours"), + V(MainloopStats, BOOL, "0"), V(AccelName, STRING, NULL), V(AccelDir, FILENAME, NULL), V(HashedControlPassword, LINELIST, NULL), @@ -2157,6 +2158,10 @@ options_act(const or_options_t *old_options) if (options->PerConnBWRate != old_options->PerConnBWRate || options->PerConnBWBurst != old_options->PerConnBWBurst) connection_or_update_token_buckets(get_connection_array(), options); + + if (options->MainloopStats != old_options->MainloopStats) { + reset_main_loop_counters(); + } }
/* Only collect directory-request statistics on relays and bridges. */ diff --git a/src/or/main.c b/src/or/main.c index aae98dd8a..7c6ab545f 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -179,6 +179,12 @@ static uint64_t stats_n_bytes_written = 0; time_t time_of_process_start = 0; /** How many seconds have we been running? */ long stats_n_seconds_working = 0; +/** How many times have we returned from the main loop successfully? */ +static uint64_t stats_n_main_loop_successes = 0; +/** How many times have we received an error from the main loop? */ +static uint64_t stats_n_main_loop_errors = 0; +/** How many times have we returned from the main loop with no events. */ +static uint64_t stats_n_main_loop_idle = 0;
/** How often will we honor SIGNEWNYM requests? */ #define MAX_SIGNEWNYM_RATE 10 @@ -493,6 +499,57 @@ connection_is_reading(connection_t *conn) (conn->read_event && event_pending(conn->read_event, EV_READ, NULL)); }
+/** Reset our main loop counters. */ +void +reset_main_loop_counters(void) +{ + stats_n_main_loop_successes = 0; + stats_n_main_loop_errors = 0; + stats_n_main_loop_idle = 0; +} + +/** Increment the main loop success counter. */ +static void +increment_main_loop_success_count(void) +{ + ++stats_n_main_loop_successes; +} + +/** Get the main loop success counter. */ +uint64_t +get_main_loop_success_count(void) +{ + return stats_n_main_loop_successes; +} + +/** Increment the main loop error counter. */ +static void +increment_main_loop_error_count(void) +{ + ++stats_n_main_loop_errors; +} + +/** Get the main loop error counter. */ +uint64_t +get_main_loop_error_count(void) +{ + return stats_n_main_loop_errors; +} + +/** Increment the main loop idle counter. */ +static void +increment_main_loop_idle_count(void) +{ + ++stats_n_main_loop_idle; +} + +/** Get the main loop idle counter. */ +uint64_t +get_main_loop_idle_count(void) +{ + return stats_n_main_loop_idle; +} + /** Check whether <b>conn</b> is correct in having (or not having) a * read/write event (passed in <b>ev</b>). On success, return 0. On failure, * log a warning and return -1. */ @@ -2693,6 +2750,8 @@ run_main_loop_once(void) if (main_loop_should_exit) return 0;
+ const or_options_t *options = get_options(); + #ifndef _WIN32 /* Make it easier to tell whether libevent failure is our fault or not. */ errno = 0; @@ -2702,7 +2761,14 @@ run_main_loop_once(void) * so that libevent knows to run their callbacks. */ SMARTLIST_FOREACH(active_linked_connection_lst, connection_t *, conn, event_active(conn->read_event, EV_READ, 1)); - called_loop_once = smartlist_len(active_linked_connection_lst) ? 1 : 0; + + if (options->MainloopStats) { + /* We always enforce that EVLOOP_ONCE is passed to event_base_loop() if we + * are collecting main loop statistics. */ + called_loop_once = 1; + } else { + called_loop_once = smartlist_len(active_linked_connection_lst) ? 1 : 0; + }
/* Make sure we know (about) what time it is. */ update_approx_time(time(NULL)); @@ -2714,6 +2780,21 @@ run_main_loop_once(void) loop_result = event_base_loop(tor_libevent_get_base(), called_loop_once ? EVLOOP_ONCE : 0);
+ if (options->MainloopStats) { + /* Update our main loop counters. */ + if (loop_result == 0) { + // The call was succesful. + increment_main_loop_success_count(); + } else if (loop_result == -1) { + // The call was erroneous. + increment_main_loop_error_count(); + } else if (loop_result == 1) { + // The call didn't have any active or pending events + // to handle. + increment_main_loop_idle_count(); + } + } + /* Oh, the loop failed. That might be an error that we need to * catch, but more likely, it's just an interrupted poll() call or something, * and we should try again. */ diff --git a/src/or/main.h b/src/or/main.h index 8eb977575..f6346b65d 100644 --- a/src/or/main.h +++ b/src/or/main.h @@ -79,6 +79,11 @@ void tor_free_all(int postfork); int do_main_loop(void); int tor_init(int argc, char **argv);
+void reset_main_loop_counters(void); +uint64_t get_main_loop_success_count(void); +uint64_t get_main_loop_error_count(void); +uint64_t get_main_loop_idle_count(void); + extern time_t time_of_process_start; extern long stats_n_seconds_working; extern int quiet_level; diff --git a/src/or/or.h b/src/or/or.h index aff43f195..69f46cede 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3972,6 +3972,8 @@ typedef struct {
int HeartbeatPeriod; /**< Log heartbeat messages after this many seconds * have passed. */ + int MainloopStats; /**< Log main loop statistics as part of the + * heartbeat messages. */
char *HTTPProxy; /**< hostname[:port] to use as http proxy, if any. */ tor_addr_t HTTPProxyAddr; /**< Parsed IPv4 addr for http proxy, if any. */ diff --git a/src/or/status.c b/src/or/status.c index f7be41e41..1a8885e83 100644 --- a/src/or/status.c +++ b/src/or/status.c @@ -157,6 +157,20 @@ log_heartbeat(time_t now) tor_free(msg); }
+ if (options->MainloopStats) { + const uint64_t main_loop_success_count = get_main_loop_success_count(); + const uint64_t main_loop_error_count = get_main_loop_error_count(); + const uint64_t main_loop_idle_count = get_main_loop_idle_count(); + + log_fn(LOG_NOTICE, LD_HEARTBEAT, "Main event loop statistics: " + U64_FORMAT " succesful returns, " + U64_FORMAT " erroneous returns, and " + U64_FORMAT " idle returns.", + U64_PRINTF_ARG(main_loop_success_count), + U64_PRINTF_ARG(main_loop_error_count), + U64_PRINTF_ARG(main_loop_idle_count)); + } + tor_free(uptime); tor_free(bw_sent); tor_free(bw_rcvd);
tor-commits@lists.torproject.org