Improved circuit queue out-of-memory handler

Previously, when we ran low on memory, we'd close whichever circuits
had the most queued cells. Now, we close those that have the
*oldest* queued cells, on the theory that those are most responsible
for us running low on memory, and that those are the least likely to
actually drain on their own if we wait a little longer.

Based on analysis from a forthcoming paper by Jansen, Tschorsch,
Johnson, and Scheuermann. Fixes bug 9093.
This commit is contained in:
Nick Mathewson 2013-11-07 12:15:30 -05:00
parent 82d8944928
commit 1b8ceb83c9
4 changed files with 64 additions and 12 deletions

7
changes/bug9093 Normal file
View File

@ -0,0 +1,7 @@
o Minor features:
- Improve the circuit queue out-of-memory handler. Previously, when
we ran low on memory, we'd close whichever circuits had the most
queued cells. Now, we close those that have the *oldest* queued
cells, on the theory that those are most responsible for us
running low on memory. Based on analysis from a forthcoming paper
by Jansen, Tschorsch, Johnson, and Scheuermann. Fixes bug 9093.

View File

@ -1383,25 +1383,56 @@ n_cells_in_circ_queues(const circuit_t *c)
return n;
}
/** helper to sort a list of circuit_q by total queue lengths, in descending
* order. */
/**
* Return the age of the oldest cell queued on <b>c</b>, in milliseconds.
* Return 0 if there are no cells queued on c. Requires that <b>now</b> be
* the current time in milliseconds since the epoch, truncated.
*
* This function will return incorrect results if the oldest cell queued on
* the circuit is older than 2**32 msec (about 49 days) old.
*/
static uint32_t
circuit_max_queued_cell_age(const circuit_t *c, uint32_t now)
{
uint32_t age = 0;
if (c->n_conn_cells.head)
age = now - c->n_conn_cells.head->inserted_time;
if (! CIRCUIT_IS_ORIGIN(c)) {
const or_circuit_t *orcirc = TO_OR_CIRCUIT((circuit_t*)c);
if (orcirc->p_conn_cells.head) {
uint32_t age2 = now - orcirc->p_conn_cells.head->inserted_time;
if (age2 > age)
return age2;
}
}
return age;
}
/** Temporary variable for circuits_compare_by_oldest_queued_cell_ This is a
* kludge to work around the fact that qsort doesn't provide a way for
* comparison functions to take an extra argument. */
static uint32_t circcomp_now_tmp;
/** Helper to sort a list of circuit_t by age of oldest cell, in descending
* order. Requires that circcomp_now_tmp is set correctly. */
static int
circuits_compare_by_queue_len_(const void **a_, const void **b_)
circuits_compare_by_oldest_queued_cell_(const void **a_, const void **b_)
{
const circuit_t *a = *a_;
const circuit_t *b = *b_;
size_t a_n = n_cells_in_circ_queues(a);
size_t b_n = n_cells_in_circ_queues(b);
uint32_t age_a = circuit_max_queued_cell_age(a, circcomp_now_tmp);
uint32_t age_b = circuit_max_queued_cell_age(b, circcomp_now_tmp);
if (a_n < b_n)
if (age_a < age_b)
return 1;
else if (a_n == b_n)
else if (age_a == age_b)
return 0;
else
return -1;
}
#define FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM 0.90
#define FRACTION_OF_CELLS_TO_RETAIN_ON_OOM 0.90
/** We're out of memory for cells, having allocated <b>current_allocation</b>
* bytes' worth. Kill the 'worst' circuits until we're under
@ -1414,13 +1445,14 @@ circuits_handle_oom(size_t current_allocation)
circuit_t *circ;
size_t n_cells_removed=0, n_cells_to_remove;
int n_circuits_killed=0;
struct timeval now;
log_notice(LD_GENERAL, "We're low on memory. Killing circuits with "
"over-long queues. (This behavior is controlled by "
"MaxMemInCellQueues.)");
{
size_t mem_target = (size_t)(get_options()->MaxMemInCellQueues *
FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM);
FRACTION_OF_CELLS_TO_RETAIN_ON_OOM);
size_t mem_to_recover;
if (current_allocation <= mem_target)
return;
@ -1433,9 +1465,13 @@ circuits_handle_oom(size_t current_allocation)
for (circ = global_circuitlist; circ; circ = circ->next)
smartlist_add(circlist, circ);
/* Set circcomp_now_tmp so that the sort can work. */
tor_gettimeofday_cached(&now);
circcomp_now_tmp = (uint32_t)tv_to_msec(&now);
/* This is O(n log n); there are faster algorithms we could use instead.
* Let's hope this doesn't happen enough to be in the critical path. */
smartlist_sort(circlist, circuits_compare_by_queue_len_);
smartlist_sort(circlist, circuits_compare_by_oldest_queued_cell_);
/* Okay, now the worst circuits are at the front of the list. Let's mark
* them, and reclaim their storage aggressively. */

View File

@ -912,8 +912,13 @@ typedef struct var_cell_t {
typedef struct packed_cell_t {
struct packed_cell_t *next; /**< Next cell queued on this circuit. */
char body[CELL_NETWORK_SIZE]; /**< Cell as packed for network. */
uint32_t inserted_time; /**< Time (in milliseconds since epoch, with high
* bits truncated) when this cell was inserted. */
} packed_cell_t;
/* XXXX This next structure may be obsoleted by inserted_time in
* packed_cell_t */
/** Number of cells added to a circuit queue including their insertion
* time on 10 millisecond detail; used for buffer statistics. */
typedef struct insertion_time_elem_t {

View File

@ -1904,15 +1904,19 @@ cell_queue_append(cell_queue_t *queue, packed_cell_t *cell)
void
cell_queue_append_packed_copy(cell_queue_t *queue, const cell_t *cell)
{
struct timeval now;
packed_cell_t *copy = packed_cell_copy(cell);
tor_gettimeofday_cached(&now);
copy->inserted_time = (uint32_t)tv_to_msec(&now);
/* Remember the time when this cell was put in the queue. */
/*XXXX This may be obsoleted by inserted_time */
if (get_options()->CellStatistics) {
struct timeval now;
uint32_t added;
insertion_time_queue_t *it_queue = queue->insertion_times;
if (!it_pool)
it_pool = mp_pool_new(sizeof(insertion_time_elem_t), 1024);
tor_gettimeofday_cached(&now);
#define SECONDS_IN_A_DAY 86400L
added = (uint32_t)(((now.tv_sec % SECONDS_IN_A_DAY) * 100L)
+ ((uint32_t)now.tv_usec / (uint32_t)10000L));