commit 2e1fe1fcf93c2a77805048bea5c535ca4456d583
Author: Nick Mathewson <nickm(a)torproject.org>
Date: Sun Jun 16 09:55:44 2013 -0400
Implement a real OOM-killer for too-long circuit queues.
This implements "algorithm 1" from my discussion of bug #9072: on OOM,
find the circuits with the longest queues, and kill them. It's also a
fix for #9063 -- without the side-effects of bug #9072.
The memory bounds aren't perfect here, and you need to be sure to
allow some slack for the rest of Tor's usage.
This isn't a perfect fix; the rest of the solutions I describe on
codeable.
---
changes/bug9063_redux | 15 ++++++++
doc/tor.1.txt | 9 +++++
src/common/mempool.h | 2 +
src/or/circuitlist.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++
src/or/circuitlist.h | 1 +
src/or/config.c | 7 ++++
src/or/or.h | 4 ++
src/or/relay.c | 33 +++++++++++++++-
src/or/relay.h | 1 +
9 files changed, 172 insertions(+), 2 deletions(-)
diff --git a/changes/bug9063_redux b/changes/bug9063_redux
new file mode 100644
index 0000000..e6fae72
--- /dev/null
+++ b/changes/bug9063_redux
@@ -0,0 +1,15 @@
+ o Major bugfixes:
+ - When we have too much memory queued in circuits (according to a new
+ MaxMemInCellQueues option), close the circuits consuming the most
+ memory. This prevents us from running out of memory as a relay if
+ circuits fill up faster than they can be drained. Fixes
+ bug 9063; bugfix on the 54th commit of Tor. This bug is a further
+ fix beyond bug 6252, whose fix was merged into 0.2.3.21-rc.
+
+ Also fixes an earlier approach taken in 0.2.4.13-alpha, where we
+ tried to solve this issue simply by imposing an upper limit on the
+ number of queued cells for a single circuit. That approach proved to
+ be problematic, since there are ways to provoke clients to send a
+ number of cells in excess of any such reasonable limit.
+ Fixes bug 9072; bugfix on 0.2.4.13-alpha.
+
diff --git a/doc/tor.1.txt b/doc/tor.1.txt
index 773fccf..1f3afef 100644
--- a/doc/tor.1.txt
+++ b/doc/tor.1.txt
@@ -1475,6 +1475,15 @@ is non-zero):
localhost, RFC1918 addresses, and so on. This can create security issues;
you should probably leave it off. (Default: 0)
+**MaxMemInCellQueues** __N__ **bytes**|**KB**|**MB**|**GB**::
+ This option configures a threshold above which Tor will assume that it
+ needs to stop queueing cells because it's about to run out of memory.
+ If it hits this threshold, it will begin killing circuits until it
+ has recovered at least 10% of this memory. Do not set this option too
+ low, or your relay may be unreliable under load. This option only
+ effects circuit queues, so the actual process size will be larger than
+ this. (Default: 8GB)
+
DIRECTORY SERVER OPTIONS
------------------------
diff --git a/src/common/mempool.h b/src/common/mempool.h
index d0a7bc2..bc424ac 100644
--- a/src/common/mempool.h
+++ b/src/common/mempool.h
@@ -22,6 +22,8 @@ void mp_pool_destroy(mp_pool_t *pool);
void mp_pool_assert_ok(mp_pool_t *pool);
void mp_pool_log_status(mp_pool_t *pool, int severity);
+#define MP_POOL_ITEM_OVERHEAD (sizeof(void*))
+
#define MEMPOOL_STATS
#ifdef MEMPOOL_PRIVATE
diff --git a/src/or/circuitlist.c b/src/or/circuitlist.c
index 93ba69d..d9ea4d1 100644
--- a/src/or/circuitlist.c
+++ b/src/or/circuitlist.c
@@ -1359,6 +1359,108 @@ _circuit_mark_for_close(circuit_t *circ, int reason, int line,
}
}
+/** Given a marked circuit <b>circ</b>, aggressively free its cell queues to
+ * recover memory. */
+static void
+marked_circuit_free_cells(circuit_t *circ)
+{
+ if (!circ->marked_for_close) {
+ log_warn(LD_BUG, "Called on non-marked circuit");
+ return;
+ }
+ cell_queue_clear(&circ->n_conn_cells);
+ if (! CIRCUIT_IS_ORIGIN(circ))
+ cell_queue_clear(& TO_OR_CIRCUIT(circ)->p_conn_cells);
+}
+
+/** Return the number of cells used by the circuit <b>c</b>'s cell queues. */
+static size_t
+n_cells_in_circ_queues(const circuit_t *c)
+{
+ size_t n = c->n_conn_cells.n;
+ if (! CIRCUIT_IS_ORIGIN(c))
+ n += TO_OR_CIRCUIT((circuit_t*)c)->p_conn_cells.n;
+ return n;
+}
+
+/** helper to sort a list of circuit_q by total queue lengths, in descending
+ * order. */
+static int
+circuits_compare_by_queue_len_(const void **a_, const void **b_)
+{
+ const circuit_t *a = *a_;
+ const circuit_t *b = *b_;
+ size_t a_n = n_cells_in_circ_queues(a);
+ size_t b_n = n_cells_in_circ_queues(b);
+
+ if (a_n < b_n)
+ return 1;
+ else if (a_n == b_n)
+ return 0;
+ else
+ return -1;
+}
+
+#define FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM 0.90
+
+/** We're out of memory for cells, having allocated <b>current_allocation</b>
+ * bytes' worth. Kill the 'worst' circuits until we're under
+ * FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM of our maximum usage. */
+void
+circuits_handle_oom(size_t current_allocation)
+{
+ /* Let's hope there's enough slack space for this allocation here... */
+ smartlist_t *circlist = smartlist_new();
+ circuit_t *circ;
+ size_t n_cells_removed=0, n_cells_to_remove;
+ int n_circuits_killed=0;
+ log_notice(LD_GENERAL, "We're low on memory. Killing circuits with "
+ "over-long queues. (This behavior is controlled by "
+ "MaxMemInCellQueues.)");
+
+ {
+ size_t mem_target = (size_t)(get_options()->MaxMemInCellQueues *
+ FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM);
+ size_t mem_to_recover;
+ if (current_allocation <= mem_target)
+ return;
+ mem_to_recover = current_allocation - mem_target;
+ n_cells_to_remove = CEIL_DIV(mem_to_recover, packed_cell_mem_cost());
+ }
+
+ /* This algorithm itself assumes that you've got enough memory slack
+ * to actually run it. */
+ for (circ = global_circuitlist; circ; circ = circ->next)
+ smartlist_add(circlist, circ);
+
+ /* This is O(n log n); there are faster algorithms we could use instead.
+ * Let's hope this doesn't happen enough to be in the critical path. */
+ smartlist_sort(circlist, circuits_compare_by_queue_len_);
+
+ /* Okay, now the worst circuits are at the front of the list. Let's mark
+ * them, and reclaim their storage aggressively. */
+ SMARTLIST_FOREACH_BEGIN(circlist, circuit_t *, circ) {
+ size_t n = n_cells_in_circ_queues(circ);
+ if (! circ->marked_for_close) {
+ circuit_mark_for_close(circ, END_CIRC_REASON_RESOURCELIMIT);
+ }
+ marked_circuit_free_cells(circ);
+
+ ++n_circuits_killed;
+ n_cells_removed += n;
+ if (n_cells_removed >= n_cells_to_remove)
+ break;
+ } SMARTLIST_FOREACH_END(circ);
+
+ clean_cell_pool(); /* In case this helps. */
+
+ log_notice(LD_GENERAL, "Removed "U64_FORMAT" bytes by killing %d circuits.",
+ U64_PRINTF_ARG(n_cells_removed * packed_cell_mem_cost()),
+ n_circuits_killed);
+
+ smartlist_free(circlist);
+}
+
/** Verify that cpath layer <b>cp</b> has all of its invariants
* correct. Trigger an assert if anything is invalid.
*/
diff --git a/src/or/circuitlist.h b/src/or/circuitlist.h
index 6e77354..44f0c1f 100644
--- a/src/or/circuitlist.h
+++ b/src/or/circuitlist.h
@@ -57,6 +57,7 @@ int circuit_count_pending_on_or_conn(or_connection_t *or_conn);
void assert_cpath_layer_ok(const crypt_path_t *cp);
void assert_circuit_ok(const circuit_t *c);
void circuit_free_all(void);
+void circuits_handle_oom(size_t current_allocation);
#endif
diff --git a/src/or/config.c b/src/or/config.c
index 90a5dfb..d5c5689 100644
--- a/src/or/config.c
+++ b/src/or/config.c
@@ -343,6 +343,7 @@ static config_var_t _option_vars[] = {
V(MaxAdvertisedBandwidth, MEMUNIT, "1 GB"),
V(MaxCircuitDirtiness, INTERVAL, "10 minutes"),
V(MaxClientCircuitsPending, UINT, "32"),
+ V(MaxMemInCellQueues, MEMUNIT, "8 GB"),
V(MaxOnionsPending, UINT, "100"),
OBSOLETE("MonthlyAccountingStart"),
V(MyFamily, STRING, NULL),
@@ -3668,6 +3669,12 @@ options_validate(or_options_t *old_options, or_options_t *options,
log_warn(LD_CONFIG, "EntryNodes is set, but UseEntryGuards is disabled. "
"EntryNodes will be ignored.");
+ if (options->MaxMemInCellQueues < (500 << 20)) {
+ log_warn(LD_CONFIG, "MaxMemInCellQueues must be at least 500 MB for now. "
+ "Ideally, have it as large as you can afford.");
+ options->MaxMemInCellQueues = (500 << 20);
+ }
+
options->_AllowInvalid = 0;
if (options->AllowInvalidNodes) {
SMARTLIST_FOREACH_BEGIN(options->AllowInvalidNodes, const char *, cp) {
diff --git a/src/or/or.h b/src/or/or.h
index 83bf8c8..dd95c34 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3064,6 +3064,10 @@ typedef struct {
config_line_t *DirPort_lines;
config_line_t *DNSPort_lines; /**< Ports to listen on for DNS requests. */
+ uint64_t MaxMemInCellQueues; /**< If we have more memory than this allocated
+ * for circuit cell queues, run the OOM handler
+ */
+
/** @name port booleans
*
* Derived booleans: True iff there is a non-listener port on an AF_INET or
diff --git a/src/or/relay.c b/src/or/relay.c
index fdb4bff..fda9e89 100644
--- a/src/or/relay.c
+++ b/src/or/relay.c
@@ -1799,7 +1799,7 @@ circuit_consider_sending_sendme(circuit_t *circ, crypt_path_t *layer_hint)
#endif
/** The total number of cells we have allocated from the memory pool. */
-static int total_cells_allocated = 0;
+static size_t total_cells_allocated = 0;
/** A memory pool to allocate packed_cell_t objects. */
static mp_pool_t *cell_pool = NULL;
@@ -1871,7 +1871,7 @@ dump_cell_pool_usage(int severity)
++n_circs;
}
log(severity, LD_MM, "%d cells allocated on %d circuits. %d cells leaked.",
- n_cells, n_circs, total_cells_allocated - n_cells);
+ n_cells, n_circs, (int)total_cells_allocated - n_cells);
mp_pool_log_status(cell_pool, severity);
}
@@ -1978,6 +1978,29 @@ cell_queue_pop(cell_queue_t *queue)
return cell;
}
+/** Return the total number of bytes used for each packed_cell in a queue.
+ * Approximate. */
+size_t
+packed_cell_mem_cost(void)
+{
+ return sizeof(packed_cell_t) + MP_POOL_ITEM_OVERHEAD +
+ get_options()->CellStatistics ?
+ (sizeof(insertion_time_elem_t)+MP_POOL_ITEM_OVERHEAD) : 0;
+}
+
+/** Check whether we've got too much space used for cells. If so,
+ * call the OOM handler and return 1. Otherwise, return 0. */
+static int
+cell_queues_check_size(void)
+{
+ size_t alloc = total_cells_allocated * packed_cell_mem_cost();
+ if (alloc >= get_options()->MaxMemInCellQueues) {
+ circuits_handle_oom(alloc);
+ return 1;
+ }
+ return 0;
+}
+
/** Return a pointer to the "next_active_on_{n,p}_conn" pointer of <b>circ</b>,
* depending on whether <b>conn</b> matches n_conn or p_conn. */
static INLINE circuit_t **
@@ -2574,6 +2597,12 @@ append_cell_to_circuit_queue(circuit_t *circ, or_connection_t *orconn,
cell_queue_append_packed_copy(queue, cell);
+ if (PREDICT_UNLIKELY(cell_queues_check_size())) {
+ /* We ran the OOM handler */
+ if (circ->marked_for_close)
+ return;
+ }
+
/* If we have too many cells on the circuit, we should stop reading from
* the edge streams for a while. */
if (!streams_blocked && queue->n >= CELL_QUEUE_HIGHWATER_SIZE)
diff --git a/src/or/relay.h b/src/or/relay.h
index 41675e2..c55813b 100644
--- a/src/or/relay.h
+++ b/src/or/relay.h
@@ -40,6 +40,7 @@ void init_cell_pool(void);
void free_cell_pool(void);
void clean_cell_pool(void);
void dump_cell_pool_usage(int severity);
+size_t packed_cell_mem_cost(void);
void cell_queue_clear(cell_queue_t *queue);
void cell_queue_append(cell_queue_t *queue, packed_cell_t *cell);