commit d438cf1ec9d5de08b8a8fffd7c38b66134fd337c Author: Andrea Shepard andrea@torproject.org Date: Sun Aug 25 08:45:07 2013 -0700
Implement scheduler mechanism to track lists of channels wanting cells or writes; doesn't actually drive the cell flow from it yet --- src/common/torlog.h | 4 +- src/or/Makefile.nmake | 1 + src/or/channel.c | 19 +++ src/or/channeltls.c | 5 + src/or/connection_or.c | 37 +++++ src/or/connection_or.h | 1 + src/or/include.am | 2 + src/or/main.c | 10 ++ src/or/relay.c | 5 + src/or/scheduler.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++++ src/or/scheduler.h | 31 ++++ 11 files changed, 525 insertions(+), 1 deletion(-)
diff --git a/src/common/torlog.h b/src/common/torlog.h index 34e39b4..2ae8aa9 100644 --- a/src/common/torlog.h +++ b/src/common/torlog.h @@ -97,8 +97,10 @@ #define LD_HEARTBEAT (1u<<20) /** Abstract channel_t code */ #define LD_CHANNEL (1u<<21) +/** Scheduler */ +#define LD_SCHED (1u<<22) /** Number of logging domains in the code. */ -#define N_LOGGING_DOMAINS 22 +#define N_LOGGING_DOMAINS 23
/** This log message is not safe to send to a callback-based logger * immediately. Used as a flag, not a log domain. */ diff --git a/src/or/Makefile.nmake b/src/or/Makefile.nmake index 523bf33..2ac98cd 100644 --- a/src/or/Makefile.nmake +++ b/src/or/Makefile.nmake @@ -63,6 +63,7 @@ LIBTOR_OBJECTS = \ routerlist.obj \ routerparse.obj \ routerset.obj \ + scheduler.obj \ statefile.obj \ status.obj \ transports.obj diff --git a/src/or/channel.c b/src/or/channel.c index c8c9263..da2493a 100644 --- a/src/or/channel.c +++ b/src/or/channel.c @@ -29,6 +29,7 @@ #include "rephist.h" #include "router.h" #include "routerlist.h" +#include "scheduler.h"
/* Cell queue structure */
@@ -788,6 +789,9 @@ channel_free(channel_t *chan) "Freeing channel " U64_FORMAT " at %p", U64_PRINTF_ARG(chan->global_identifier), chan);
+ /* Get this one out of the scheduler */ + scheduler_release_channel(chan); + /* * Get rid of cmux policy before we do anything, so cmux policies don't * see channels in weird half-freed states. @@ -863,6 +867,9 @@ channel_force_free(channel_t *chan) "Force-freeing channel " U64_FORMAT " at %p", U64_PRINTF_ARG(chan->global_identifier), chan);
+ /* Get this one out of the scheduler */ + scheduler_release_channel(chan); + /* * Get rid of cmux policy before we do anything, so cmux policies don't * see channels in weird half-freed states. @@ -1941,6 +1948,18 @@ channel_change_state(channel_t *chan, channel_state_t to_state) } }
+ /* + * If we're going to a closed/closing state, we don't need scheduling any + * more; in CHANNEL_STATE_MAINT we can't accept writes. + */ + if (to_state == CHANNEL_STATE_CLOSING || + to_state == CHANNEL_STATE_CLOSED || + to_state == CHANNEL_STATE_ERROR) { + scheduler_release_channel(chan); + } else if (to_state == CHANNEL_STATE_MAINT) { + scheduler_channel_doesnt_want_writes(chan); + } + /* Tell circuits if we opened and stuff */ if (to_state == CHANNEL_STATE_OPEN) { channel_do_open_actions(chan); diff --git a/src/or/channeltls.c b/src/or/channeltls.c index 245e335..af1aab3 100644 --- a/src/or/channeltls.c +++ b/src/or/channeltls.c @@ -25,6 +25,7 @@ #include "relay.h" #include "router.h" #include "routerlist.h" +#include "scheduler.h"
/** How many CELL_PADDING cells have we received, ever? */ uint64_t stats_n_padding_cells_processed = 0; @@ -867,6 +868,10 @@ channel_tls_handle_state_change_on_orconn(channel_tls_t *chan, * CHANNEL_STATE_MAINT on this. */ channel_change_state(base_chan, CHANNEL_STATE_OPEN); + /* We might have just become writeable; check and tell the scheduler */ + if (connection_or_num_cells_writeable(conn) > 0) { + scheduler_channel_wants_writes(base_chan); + } } else { /* * Not open, so from CHANNEL_STATE_OPEN we go to CHANNEL_STATE_MAINT, diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 7fcc5b2..9074c0a 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -38,6 +38,8 @@ #include "router.h" #include "routerlist.h" #include "ext_orport.h" +#include "scheduler.h" + #ifdef USE_BUFFEREVENTS #include <event2/bufferevent_ssl.h> #endif @@ -595,6 +597,17 @@ connection_or_flushed_some(or_connection_t *conn) * high water mark. */ datalen = connection_get_outbuf_len(TO_CONN(conn)); if (datalen < OR_CONN_LOWWATER) { + /* Let the scheduler know */ + scheduler_channel_wants_writes(TLS_CHAN_TO_BASE(conn->chan)); + + /* + * TODO this will be done from the scheduler, so it will + * need a generic way to ask how many cells a channel can + * accept and if it still wants writes or not to know how + * to account for it in the case that it runs out of cells + * to send first. + */ + while ((conn->chan) && channel_tls_more_to_flush(conn->chan)) { /* Compute how many more cells we want at most */ n = CEIL_DIV(OR_CONN_HIGHWATER - datalen, cell_network_size); @@ -616,6 +629,30 @@ connection_or_flushed_some(or_connection_t *conn) return 0; }
+/** This is for channeltls.c to ask how many cells we could accept if + * they were available. */ +ssize_t +connection_or_num_cells_writeable(or_connection_t *conn) +{ + size_t datalen, cell_network_size; + ssize_t n = 0; + + tor_assert(conn); + + /* + * If we're under the high water mark, we're potentially + * writeable; note this is different from the calculation above + * used to trigger when to start writing after we've stopped. + */ + datalen = connection_get_outbuf_len(TO_CONN(conn)); + if (datalen < OR_CONN_HIGHWATER) { + cell_network_size = get_cell_network_size(conn->wide_circ_ids); + n = CEIL_DIV(OR_CONN_HIGHWATER - datalen, cell_network_size); + } + + return n; +} + /** Connection <b>conn</b> has finished writing and has no bytes left on * its outbuf. * diff --git a/src/or/connection_or.h b/src/or/connection_or.h index 143540e..7b46d7d 100644 --- a/src/or/connection_or.h +++ b/src/or/connection_or.h @@ -24,6 +24,7 @@ void connection_or_set_bad_connections(const char *digest, int force); void connection_or_block_renegotiation(or_connection_t *conn); int connection_or_reached_eof(or_connection_t *conn); int connection_or_process_inbuf(or_connection_t *conn); +ssize_t connection_or_num_cells_writeable(or_connection_t *conn); int connection_or_flushed_some(or_connection_t *conn); int connection_or_finished_flushing(or_connection_t *conn); int connection_or_finished_connecting(or_connection_t *conn); diff --git a/src/or/include.am b/src/or/include.am index 47bdd09..b2b9406 100644 --- a/src/or/include.am +++ b/src/or/include.am @@ -80,6 +80,7 @@ LIBTOR_A_SOURCES = \ src/or/routerlist.c \ src/or/routerparse.c \ src/or/routerset.c \ + src/or/scheduler.c \ src/or/statefile.c \ src/or/status.c \ $(evdns_source) \ @@ -185,6 +186,7 @@ ORHEADERS = \ src/or/routerlist.h \ src/or/routerset.h \ src/or/routerparse.h \ + src/or/scheduler.h \ src/or/statefile.h \ src/or/status.h
diff --git a/src/or/main.c b/src/or/main.c index 094120f..1a2cfad 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -52,6 +52,7 @@ #include "router.h" #include "routerlist.h" #include "routerparse.h" +#include "scheduler.h" #include "statefile.h" #include "status.h" #include "util_process.h" @@ -2455,6 +2456,14 @@ tor_init(int argc, char *argv[]) log_warn(LD_NET, "Problem initializing libevent RNG."); }
+ /* + * Initialize the scheduler - this has to come after + * options_init_from_torrc() sets up libevent - why yes, that seems + * completely sensible to hide the libevent setup in the option parsing + * code! + */ + scheduler_init(); + return 0; }
@@ -2552,6 +2561,7 @@ tor_free_all(int postfork) channel_tls_free_all(); channel_free_all(); connection_free_all(); + scheduler_free_all(); buf_shrink_freelists(1); memarea_clear_freelist(); nodelist_free_all(); diff --git a/src/or/relay.c b/src/or/relay.c index d97c84f..2c16e4a 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -39,6 +39,7 @@ #include "router.h" #include "routerlist.h" #include "routerparse.h" +#include "scheduler.h"
static edge_connection_t *relay_lookup_conn(circuit_t *circ, cell_t *cell, cell_direction_t cell_direction, @@ -2868,6 +2869,10 @@ append_cell_to_circuit_queue(circuit_t *circ, channel_t *chan, log_debug(LD_GENERAL, "Made a circuit active."); }
+ /* New way: mark this as having waiting cells for the scheduler */ + scheduler_channel_has_waiting_cells(chan); + + /* TODO remove this once scheduler does it */ if (!channel_has_queued_writes(chan)) { /* There is no data at all waiting to be sent on the outbuf. Add a * cell, so that we can notice when it gets flushed, flushed_some can diff --git a/src/or/scheduler.c b/src/or/scheduler.c new file mode 100644 index 0000000..451634a --- /dev/null +++ b/src/or/scheduler.c @@ -0,0 +1,411 @@ +/* * Copyright (c) 2013, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file scheduler.c + * \brief Relay scheduling system + **/ + +#include "or.h" +#include "channel.h" +#include "compat_libevent.h" +#include "scheduler.h" + +#ifdef HAVE_EVENT2_EVENT_H +#include <event2/event.h> +#else +#include <event.h> +#endif + +/* + * Write scheduling works by keeping track of lists of channels that can + * accept cells, and have cells to write. From the scheduler's perspective, + * a channel can be in four possible states: + * + * 1.) Not open for writes, no cells to send + * - Not much to do here, and the channel will appear in neither list. + * - Transitions from: + * - Open for writes/has cells by simultaneously draining all circuit + * queues and filling the output buffer. + * - Transitions to: + * - Not open for writes/has cells by arrival of cells on an attached + * circuit (this would be driven from append_cell_to_circuit_queue()) + * - Open for writes/no cells by a channel type specific path; + * driven from connection_or_flushed_some() for channel_tls_t. + * + * 2.) Open for writes, no cells to send + * - Not much here either; this will be the state an idle but open channel + * can be expected to settle in. + * - Transitions from: + * - Not open for writes/no cells by flushing some of the output + * buffer. + * - Open for writes/has cells by the scheduler moving cells from + * circuit queues to channel output queue, but not having enough + * to fill the output queue. + * - Transitions to: + * - Open for writes/has cells by arrival of new cells on an attached + * circuit, in append_cell_to_circuit_queue() + * + * 3.) Not open for writes, cells to send + * - This is the state of a busy circuit limited by output bandwidth; + * cells have piled up in the circuit queues waiting to be relayed. + * - Transitions from: + * - Not open for writes/no cells by arrival of cells on an attached + * circuit + * - Open for writes/has cells by filling an output buffer without + * draining all cells from attached circuits + * - Transitions to: + * - Opens for writes/has cells by draining some of the output buffer + * via the connection_or_flushed_some() path (for channel_tls_t). + * + * 4.) Open for writes, cells to send + * - This connection is ready to relay some cells and waiting for + * the scheduler to choose it + * - Transitions from: + * - Not open for writes/has cells by the connection_or_flushed_some() + * path + * - Open for writes/no cells by the append_cell_to_circuit_queue() + * path + * - Transitions to: + * - Not open for writes/no cells by draining all circuit queues and + * simultaneously filling the output buffer. + * - Not open for writes/has cells by writing enough cells to fill the + * output buffer + * - Open for writes/no cells by draining all attached circuit queues + * without also filling the output buffer + * + * Other event-driven parts of the code move channels between these scheduling + * states by calling scheduler functions; the scheduler only runs on open-for- + * writes/has-cells channels and is the only path for those to transition to + * other states. The scheduler_run() function gives us the opportunity to do + * scheduling work, and is called from other scheduler functions whenever a + * state transition occurs, and periodically from the main event loop. + */ + +/* Scheduler global data structures */ + +/* + * We keep lists of channels that either have cells queued, can accept + * writes, or both (states 2, 3 and 4 above) - no explicit list of state + * 1 channels is kept, so we don't have to worry about registering new + * channels here or anything. The scheduler will learn about them when + * it needs to. We can check how many channels in state 4 in O(1), so + * the test whether we have anything to do in scheduler_run() is fast + * and there's no harm in calling it opportunistically whenever we get + * the chance. + * + * Note that it takes time O(n) to search for a channel in these smartlists + * or move one; I don't think the number of channels on a relay will be large + * enough for this to be a severe problem, but this would benefit from using + * a doubly-linked list rather than smartlist_t, together with a hash map from + * channel identifiers to pointers to list entries, so we can perform those + * operations in O(log(n)). + */ + +/* List of channels that can write but have no cells (state 2 above) */ +static smartlist_t *channels_waiting_for_cells = NULL; + +/* List of channels with cells waiting to write (state 3 above) */ +static smartlist_t *channels_waiting_to_write = NULL; + +/* List of channels that can write and have cells (pending work) */ +static smartlist_t *channels_pending = NULL; + +/* + * This event runs the scheduler from its callback, and is manually + * activated whenever a channel enters open for writes/cells to send. + */ + +static struct event *run_sched_ev = NULL; +static struct timeval run_sched_tv; + +/* Scheduler static function declarations */ + +static void scheduler_evt_callback(evutil_socket_t fd, + short events, void *arg); +static int scheduler_more_work(void); +static void scheduler_retrigger(void); +static void scheduler_trigger(void); + +/* Scheduler function implementations */ + +/** Free everything and shut down the scheduling system */ + +void +scheduler_free_all(void) +{ + log_debug(LD_SCHED, "Shutting down scheduler"); + + if (run_sched_ev) { + event_del(run_sched_ev); + tor_event_free(run_sched_ev); + run_sched_ev = NULL; + } + + if (channels_waiting_for_cells) { + smartlist_free(channels_waiting_for_cells); + channels_waiting_for_cells = NULL; + } + + if (channels_waiting_to_write) { + smartlist_free(channels_waiting_to_write); + channels_waiting_to_write = NULL; + } + + if (channels_pending) { + smartlist_free(channels_pending); + channels_pending = NULL; + } +} + +/* + * Scheduler event callback; this should get triggered once per event loop + * if any scheduling work was created during the event loop. + */ + +static void +scheduler_evt_callback(evutil_socket_t fd, short events, void *arg) +{ + log_debug(LD_SCHED, "Scheduler event callback called"); + + tor_assert(run_sched_ev); + + /* Run the scheduler */ + scheduler_run(); + + /* Do we have more work to do? */ + if (scheduler_more_work()) scheduler_retrigger(); +} + +/** Mark a channel as no longer ready to accept writes */ + +void +scheduler_channel_doesnt_want_writes(channel_t *chan) +{ + tor_assert(chan); + tor_assert(channels_waiting_for_cells); + tor_assert(channels_waiting_to_write); + tor_assert(channels_pending); + + /* If it's already in pending, we can put it in waiting_to_write */ + if (smartlist_contains(channels_pending, chan)) { + /* + * It's in channels_pending, so it shouldn't be in any of + * the other lists. It can't write any more, so it goes to + * channels_waiting_to_write. + */ + smartlist_remove(channels_pending, chan); + smartlist_add(channels_waiting_to_write, chan); + log_debug(LD_SCHED, + "Channel " U64_FORMAT " at %p went from pending " + "to waiting_to_write", + U64_PRINTF_ARG(chan->global_identifier), chan); + } else { + /* + * It's not in pending, so it can't become waiting_to_write; it's + * either not in any of the lists (nothing to do) or it's already in + * waiting_for_cells (remove it, can't write any more). + */ + if (smartlist_contains(channels_waiting_for_cells, chan)) { + smartlist_remove(channels_waiting_for_cells, chan); + log_debug(LD_SCHED, + "Channel " U64_FORMAT " at %p left waiting_for_cells", + U64_PRINTF_ARG(chan->global_identifier), chan); + } + } +} + +/** Mark a channel as having waiting cells */ + +void +scheduler_channel_has_waiting_cells(channel_t *chan) +{ + int became_pending = 0; + + tor_assert(chan); + tor_assert(channels_waiting_for_cells); + tor_assert(channels_waiting_to_write); + tor_assert(channels_pending); + + /* First, check if this one also writeable */ + if (smartlist_contains(channels_waiting_for_cells, chan)) { + /* + * It's in channels_waiting_for_cells, so it shouldn't be in any of + * the other lists. It has waiting cells now, so it goes to + * channels_pending. + */ + smartlist_remove(channels_waiting_for_cells, chan); + smartlist_add(channels_pending, chan); + log_debug(LD_SCHED, + "Channel " U64_FORMAT " at %p went from waiting_for_cells " + "to pending", + U64_PRINTF_ARG(chan->global_identifier), chan); + became_pending = 1; + } else { + /* + * It's not in waiting_for_cells, so it can't become pending; it's + * either not in any of the lists (we add it to waiting_to_write) + * or it's already in waiting_to_write or pending (we do nothing) + */ + if (!(smartlist_contains(channels_waiting_to_write, chan) || + smartlist_contains(channels_pending, chan))) { + smartlist_add(channels_waiting_to_write, chan); + log_debug(LD_SCHED, + "Channel " U64_FORMAT " at %p entered waiting_to_write", + U64_PRINTF_ARG(chan->global_identifier), chan); + } + } + + /* + * If we made a channel pending, we potentially have scheduling work + * to do. + */ + if (became_pending) scheduler_retrigger(); +} + +/** Set up the scheduling system */ + +void +scheduler_init(void) +{ + log_debug(LD_SCHED, "Initting scheduler"); + + tor_assert(!run_sched_ev); + run_sched_ev = tor_event_new(tor_libevent_get_base(), -1, + 0, scheduler_evt_callback, NULL); + + channels_waiting_for_cells = smartlist_new(); + channels_waiting_to_write = smartlist_new(); + channels_pending = smartlist_new(); +} + +/** Check if there's more scheduling work */ + +static int +scheduler_more_work(void) +{ + tor_assert(channels_pending); + + return (smartlist_len(channels_pending) > 0) ? 1 : 0; +} + +/** Retrigger the scheduler in a way safe to use from the callback */ + +static void +scheduler_retrigger(void) +{ + tor_assert(run_sched_ev); + + if (!evtimer_pending(run_sched_ev, NULL)) { + log_debug(LD_SCHED, "Retriggering scheduler event"); + + event_del(run_sched_ev); + evtimer_add(run_sched_ev, &run_sched_tv); + } +} + +/** Notify the scheduler of a channel being closed */ + +void +scheduler_release_channel(channel_t *chan) +{ + tor_assert(chan); + + tor_assert(channels_waiting_for_cells); + tor_assert(channels_waiting_to_write); + tor_assert(channels_pending); + + smartlist_remove(channels_waiting_for_cells, chan); + smartlist_remove(channels_waiting_to_write, chan); + smartlist_remove(channels_pending, chan); +} + +/** Run the scheduling algorithm if necessary */ + +void +scheduler_run(void) +{ + smartlist_t *tmp = NULL; + + log_debug(LD_SCHED, "We have a chance to run the scheduler"); + + /* + * TODO make this work properly + * + * For now, just empty the pending list and log that we saw stuff in it + */ + + tmp = channels_pending; + channels_pending = smartlist_new(); + + SMARTLIST_FOREACH_BEGIN(tmp, channel_t *, chan) { + log_debug(LD_SCHED, + "Scheduler saw pending channel " U64_FORMAT " at %p", + U64_PRINTF_ARG(chan->global_identifier), chan); + } SMARTLIST_FOREACH_END(chan); + + smartlist_free(tmp); +} + +/** Trigger the scheduling event so we run the scheduler later */ + +static void +scheduler_trigger(void) +{ + log_debug(LD_SCHED, "Triggering scheduler event"); + + tor_assert(run_sched_ev); + + run_sched_tv.tv_sec = 0; + run_sched_tv.tv_usec = 0; + + evtimer_add(run_sched_ev, &run_sched_tv); +} + +/** Mark a channel as ready to accept writes */ + +void +scheduler_channel_wants_writes(channel_t *chan) +{ + int became_pending = 0; + + tor_assert(chan); + tor_assert(channels_waiting_for_cells); + tor_assert(channels_waiting_to_write); + tor_assert(channels_pending); + + /* If it's already in waiting_to_write, we can put it in pending */ + if (smartlist_contains(channels_waiting_to_write, chan)) { + /* + * It's in channels_waiting_to_write, so it shouldn't be in any of + * the other lists. It can write now, so it goes to channels_pending. + */ + smartlist_remove(channels_waiting_to_write, chan); + smartlist_add(channels_pending, chan); + log_debug(LD_SCHED, + "Channel " U64_FORMAT " at %p went from waiting_to_write " + "to pending", + U64_PRINTF_ARG(chan->global_identifier), chan); + became_pending = 1; + } else { + /* + * It's not in waiting_to_write, so it can't become pending; it's + * either not in any of the lists (we add it to waiting_for_cells) + * or it's already in waiting_for_cells or pending (we do nothing) + */ + if (!(smartlist_contains(channels_waiting_for_cells, chan) || + smartlist_contains(channels_pending, chan))) { + smartlist_add(channels_waiting_for_cells, chan); + log_debug(LD_SCHED, + "Channel " U64_FORMAT " at %p entered waiting_for_cells", + U64_PRINTF_ARG(chan->global_identifier), chan); + } + } + + /* + * If we made a channel pending, we potentially have scheduling work + * to do. + */ + if (became_pending) scheduler_retrigger(); +} + diff --git a/src/or/scheduler.h b/src/or/scheduler.h new file mode 100644 index 0000000..b25e36e --- /dev/null +++ b/src/or/scheduler.h @@ -0,0 +1,31 @@ +/* * Copyright (c) 2013, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file scheduler.h + * \brief Header file for scheduler.c + **/ + +#ifndef TOR_SCHEDULER_H +#define TOR_SCHEDULER_H + +#include "or.h" +#include "channel.h" + +/* Global-visibility scheduler functions */ + +/* Set up and shut down the scheduler from main.c */ +void scheduler_free_all(void); +void scheduler_init(void); +void scheduler_run(void); + +/* Mark channels as having cells or wanting/not wanting writes */ +void scheduler_channel_doesnt_want_writes(channel_t *chan); +void scheduler_channel_has_waiting_cells(channel_t *chan); +void scheduler_channel_wants_writes(channel_t *chan); + +/* Notify the scheduler of a channel being closed */ +void scheduler_release_channel(channel_t *chan); + +#endif /* !defined(TOR_SCHEDULER_H) */ +
tor-commits@lists.torproject.org