Prop#329 OOM: Handle freeing conflux queues on OOM

We use the oldest-circ-first method here, since that seems good for conflux:
queues could briefly spike, but the bad case is if they are maliciously
bloated to stick around for a long time.

The tradeoff here is that it is possible to kill old circuits on a relay
quickly, but that has always been the case with this algorithm choice.

Signed-off-by: David Goulet <dgoulet@torproject.org>
This commit is contained in:
David Goulet 2023-03-03 14:28:18 -05:00 committed by Mike Perry
parent e0881a669a
commit b999051e44
3 changed files with 50 additions and 0 deletions

View File

@ -2739,6 +2739,7 @@ circuits_handle_oom(size_t current_allocation)
mem_recovered += n * packed_cell_mem_cost();
mem_recovered += half_stream_alloc;
mem_recovered += freed;
mem_recovered += conflux_get_circ_bytes_allocation(circ);
if (mem_recovered >= mem_to_recover)
goto done_recovering_mem;

View File

@ -32,6 +32,10 @@
static inline uint64_t cwnd_sendable(const circuit_t *on_circ,
uint64_t in_usec, uint64_t our_usec);
/* Track the total number of bytes used by all ooo_q so it can be used by the
* OOM handler to assess. */
static uint64_t total_ooo_q_bytes = 0;
/**
* Determine if we should multiplex a specific relay command or not.
*
@ -156,6 +160,41 @@ conflux_get_max_seq_recv(const conflux_t *cfx)
return max_seq_recv;
}
/** Return the total memory allocation the circuit is using by conflux. If this
* circuit is not a Conflux circuit, 0 is returned. */
uint64_t
conflux_get_circ_bytes_allocation(const circuit_t *circ)
{
if (circ->conflux) {
return smartlist_len(circ->conflux->ooo_q) * sizeof(conflux_cell_t);
}
return 0;
}
/** Return the total memory allocation in bytes by the subsystem.
*
* At the moment, only out of order queues are consiered. */
uint64_t
conflux_get_total_bytes_allocation(void)
{
return total_ooo_q_bytes;
}
/** The OOM handler is asking us to try to free at least bytes_to_remove. */
size_t
conflux_handle_oom(size_t bytes_to_remove)
{
(void) bytes_to_remove;
/* We are not doing anything on the sets, the OOM handler will trigger a
* circuit clean up which will affect conflux sets, by pruning oldest
* circuits. */
log_info(LD_CIRC, "OOM handler triggered. OOO queus allocation: %" PRIu64,
total_ooo_q_bytes);
return 0;
}
/**
* Returns true if a circuit has package window space to send, and is
* not blocked locally.

View File

@ -2879,6 +2879,8 @@ cell_queues_check_size(void)
alloc += geoip_client_cache_total;
const size_t dns_cache_total = dns_cache_total_allocation();
alloc += dns_cache_total;
const size_t conflux_total = conflux_get_total_bytes_allocation();
alloc += conflux_total;
if (alloc >= get_options()->MaxMemInQueues_low_threshold) {
last_time_under_memory_pressure = approx_time();
if (alloc >= get_options()->MaxMemInQueues) {
@ -2910,6 +2912,14 @@ cell_queues_check_size(void)
oom_stats_n_bytes_removed_dns += removed;
alloc -= removed;
}
/* Like onion service above, try to go down to 10% if we are above 20% */
if (conflux_total > get_options()->MaxMemInQueues / 5) {
const size_t bytes_to_remove =
conflux_total - (size_t)(get_options()->MaxMemInQueues / 10);
removed = conflux_handle_oom(bytes_to_remove);
oom_stats_n_bytes_removed_cell += removed;
alloc -= removed;
}
removed = circuits_handle_oom(alloc);
oom_stats_n_bytes_removed_cell += removed;
return 1;