mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-24 12:23:32 +01:00
relay: Add the OOM invocation metrics
With this commit, a relay now emits metrics event on the MetricsPort related to the OOM invocation for: - DNS cache - GeoIP database - Cell queues - HSDir caches Everytime the OOM is invoked, the number of bytes is added to the metrics counter for that specific type of invocation. Related to #40367 Signed-off-by: David Goulet <dgoulet@torproject.org>
This commit is contained in:
parent
bdde4579c2
commit
9c2fa34982
@ -2586,8 +2586,10 @@ conns_compare_by_buffer_age_(const void **a_, const void **b_)
|
|||||||
|
|
||||||
/** We're out of memory for cells, having allocated <b>current_allocation</b>
|
/** We're out of memory for cells, having allocated <b>current_allocation</b>
|
||||||
* bytes' worth. Kill the 'worst' circuits until we're under
|
* bytes' worth. Kill the 'worst' circuits until we're under
|
||||||
* FRACTION_OF_DATA_TO_RETAIN_ON_OOM of our maximum usage. */
|
* FRACTION_OF_DATA_TO_RETAIN_ON_OOM of our maximum usage.
|
||||||
void
|
*
|
||||||
|
* Return the number of bytes removed. */
|
||||||
|
size_t
|
||||||
circuits_handle_oom(size_t current_allocation)
|
circuits_handle_oom(size_t current_allocation)
|
||||||
{
|
{
|
||||||
smartlist_t *circlist;
|
smartlist_t *circlist;
|
||||||
@ -2613,12 +2615,11 @@ circuits_handle_oom(size_t current_allocation)
|
|||||||
tor_zstd_get_total_allocation(),
|
tor_zstd_get_total_allocation(),
|
||||||
tor_lzma_get_total_allocation(),
|
tor_lzma_get_total_allocation(),
|
||||||
hs_cache_get_total_allocation());
|
hs_cache_get_total_allocation());
|
||||||
|
|
||||||
{
|
{
|
||||||
size_t mem_target = (size_t)(get_options()->MaxMemInQueues *
|
size_t mem_target = (size_t)(get_options()->MaxMemInQueues *
|
||||||
FRACTION_OF_DATA_TO_RETAIN_ON_OOM);
|
FRACTION_OF_DATA_TO_RETAIN_ON_OOM);
|
||||||
if (current_allocation <= mem_target)
|
if (current_allocation <= mem_target)
|
||||||
return;
|
return 0;
|
||||||
mem_to_recover = current_allocation - mem_target;
|
mem_to_recover = current_allocation - mem_target;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2697,7 +2698,6 @@ circuits_handle_oom(size_t current_allocation)
|
|||||||
} SMARTLIST_FOREACH_END(circ);
|
} SMARTLIST_FOREACH_END(circ);
|
||||||
|
|
||||||
done_recovering_mem:
|
done_recovering_mem:
|
||||||
|
|
||||||
log_notice(LD_GENERAL, "Removed %"TOR_PRIuSZ" bytes by killing %d circuits; "
|
log_notice(LD_GENERAL, "Removed %"TOR_PRIuSZ" bytes by killing %d circuits; "
|
||||||
"%d circuits remain alive. Also killed %d non-linked directory "
|
"%d circuits remain alive. Also killed %d non-linked directory "
|
||||||
"connections.",
|
"connections.",
|
||||||
@ -2705,6 +2705,8 @@ circuits_handle_oom(size_t current_allocation)
|
|||||||
n_circuits_killed,
|
n_circuits_killed,
|
||||||
smartlist_len(circlist) - n_circuits_killed,
|
smartlist_len(circlist) - n_circuits_killed,
|
||||||
n_dirconns_killed);
|
n_dirconns_killed);
|
||||||
|
|
||||||
|
return mem_recovered;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Verify that circuit <b>c</b> has all of its invariants
|
/** Verify that circuit <b>c</b> has all of its invariants
|
||||||
|
@ -232,7 +232,7 @@ int circuit_count_pending_on_channel(channel_t *chan);
|
|||||||
|
|
||||||
MOCK_DECL(void, assert_circuit_ok,(const circuit_t *c));
|
MOCK_DECL(void, assert_circuit_ok,(const circuit_t *c));
|
||||||
void circuit_free_all(void);
|
void circuit_free_all(void);
|
||||||
void circuits_handle_oom(size_t current_allocation);
|
size_t circuits_handle_oom(size_t current_allocation);
|
||||||
|
|
||||||
void circuit_clear_testing_cell_stats(circuit_t *circ);
|
void circuit_clear_testing_cell_stats(circuit_t *circ);
|
||||||
|
|
||||||
|
@ -2701,11 +2701,18 @@ cell_queues_get_total_allocation(void)
|
|||||||
/** The time at which we were last low on memory. */
|
/** The time at which we were last low on memory. */
|
||||||
static time_t last_time_under_memory_pressure = 0;
|
static time_t last_time_under_memory_pressure = 0;
|
||||||
|
|
||||||
|
/** Statistics on how many bytes were removed by the OOM per type. */
|
||||||
|
uint64_t oom_stats_n_bytes_removed_dns = 0;
|
||||||
|
uint64_t oom_stats_n_bytes_removed_cell = 0;
|
||||||
|
uint64_t oom_stats_n_bytes_removed_geoip = 0;
|
||||||
|
uint64_t oom_stats_n_bytes_removed_hsdir = 0;
|
||||||
|
|
||||||
/** Check whether we've got too much space used for cells. If so,
|
/** Check whether we've got too much space used for cells. If so,
|
||||||
* call the OOM handler and return 1. Otherwise, return 0. */
|
* call the OOM handler and return 1. Otherwise, return 0. */
|
||||||
STATIC int
|
STATIC int
|
||||||
cell_queues_check_size(void)
|
cell_queues_check_size(void)
|
||||||
{
|
{
|
||||||
|
size_t removed = 0;
|
||||||
time_t now = time(NULL);
|
time_t now = time(NULL);
|
||||||
size_t alloc = cell_queues_get_total_allocation();
|
size_t alloc = cell_queues_get_total_allocation();
|
||||||
alloc += half_streams_get_total_allocation();
|
alloc += half_streams_get_total_allocation();
|
||||||
@ -2730,20 +2737,27 @@ cell_queues_check_size(void)
|
|||||||
if (hs_cache_total > get_options()->MaxMemInQueues / 5) {
|
if (hs_cache_total > get_options()->MaxMemInQueues / 5) {
|
||||||
const size_t bytes_to_remove =
|
const size_t bytes_to_remove =
|
||||||
hs_cache_total - (size_t)(get_options()->MaxMemInQueues / 10);
|
hs_cache_total - (size_t)(get_options()->MaxMemInQueues / 10);
|
||||||
alloc -= hs_cache_handle_oom(now, bytes_to_remove);
|
removed = hs_cache_handle_oom(now, bytes_to_remove);
|
||||||
|
oom_stats_n_bytes_removed_hsdir += removed;
|
||||||
|
alloc -= removed;
|
||||||
}
|
}
|
||||||
if (geoip_client_cache_total > get_options()->MaxMemInQueues / 5) {
|
if (geoip_client_cache_total > get_options()->MaxMemInQueues / 5) {
|
||||||
const size_t bytes_to_remove =
|
const size_t bytes_to_remove =
|
||||||
geoip_client_cache_total -
|
geoip_client_cache_total -
|
||||||
(size_t)(get_options()->MaxMemInQueues / 10);
|
(size_t)(get_options()->MaxMemInQueues / 10);
|
||||||
alloc -= geoip_client_cache_handle_oom(now, bytes_to_remove);
|
removed = geoip_client_cache_handle_oom(now, bytes_to_remove);
|
||||||
|
oom_stats_n_bytes_removed_geoip += removed;
|
||||||
|
alloc -= removed;
|
||||||
}
|
}
|
||||||
if (dns_cache_total > get_options()->MaxMemInQueues / 5) {
|
if (dns_cache_total > get_options()->MaxMemInQueues / 5) {
|
||||||
const size_t bytes_to_remove =
|
const size_t bytes_to_remove =
|
||||||
dns_cache_total - (size_t)(get_options()->MaxMemInQueues / 10);
|
dns_cache_total - (size_t)(get_options()->MaxMemInQueues / 10);
|
||||||
alloc -= dns_cache_handle_oom(now, bytes_to_remove);
|
removed = dns_cache_handle_oom(now, bytes_to_remove);
|
||||||
|
oom_stats_n_bytes_removed_dns += removed;
|
||||||
|
alloc -= removed;
|
||||||
}
|
}
|
||||||
circuits_handle_oom(alloc);
|
removed = circuits_handle_oom(alloc);
|
||||||
|
oom_stats_n_bytes_removed_cell += removed;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -49,6 +49,11 @@ extern uint64_t stats_n_data_bytes_packaged;
|
|||||||
extern uint64_t stats_n_data_cells_received;
|
extern uint64_t stats_n_data_cells_received;
|
||||||
extern uint64_t stats_n_data_bytes_received;
|
extern uint64_t stats_n_data_bytes_received;
|
||||||
|
|
||||||
|
extern uint64_t oom_stats_n_bytes_removed_dns;
|
||||||
|
extern uint64_t oom_stats_n_bytes_removed_cell;
|
||||||
|
extern uint64_t oom_stats_n_bytes_removed_geoip;
|
||||||
|
extern uint64_t oom_stats_n_bytes_removed_hsdir;
|
||||||
|
|
||||||
void dump_cell_pool_usage(int severity);
|
void dump_cell_pool_usage(int severity);
|
||||||
size_t packed_cell_mem_cost(void);
|
size_t packed_cell_mem_cost(void);
|
||||||
|
|
||||||
|
@ -10,6 +10,9 @@
|
|||||||
|
|
||||||
#include "orconfig.h"
|
#include "orconfig.h"
|
||||||
|
|
||||||
|
#include "core/or/or.h"
|
||||||
|
#include "core/or/relay.h"
|
||||||
|
|
||||||
#include "lib/malloc/malloc.h"
|
#include "lib/malloc/malloc.h"
|
||||||
#include "lib/container/smartlist.h"
|
#include "lib/container/smartlist.h"
|
||||||
#include "lib/metrics/metrics_store.h"
|
#include "lib/metrics/metrics_store.h"
|
||||||
@ -17,16 +20,78 @@
|
|||||||
|
|
||||||
#include "feature/relay/relay_metrics.h"
|
#include "feature/relay/relay_metrics.h"
|
||||||
|
|
||||||
|
/** Declarations of each fill function for metrics defined in base_metrics. */
|
||||||
|
static void fill_oom_values(void);
|
||||||
|
|
||||||
/** The base metrics that is a static array of metrics added to the metrics
|
/** The base metrics that is a static array of metrics added to the metrics
|
||||||
* store.
|
* store.
|
||||||
*
|
*
|
||||||
* The key member MUST be also the index of the entry in the array. */
|
* The key member MUST be also the index of the entry in the array. */
|
||||||
static const relay_metrics_entry_t base_metrics[] = {};
|
static const relay_metrics_entry_t base_metrics[] =
|
||||||
|
{
|
||||||
|
{
|
||||||
|
.key = RELAY_METRICS_NUM_OOM_BYTES,
|
||||||
|
.type = METRICS_TYPE_COUNTER,
|
||||||
|
.name = METRICS_NAME(relay_load_oom_bytes_total),
|
||||||
|
.help = "Total number of bytes the OOM has freed by subsystem",
|
||||||
|
.fill_fn = fill_oom_values,
|
||||||
|
},
|
||||||
|
};
|
||||||
static const size_t num_base_metrics = ARRAY_LENGTH(base_metrics);
|
static const size_t num_base_metrics = ARRAY_LENGTH(base_metrics);
|
||||||
|
|
||||||
/** The only and single store of all the relay metrics. */
|
/** The only and single store of all the relay metrics. */
|
||||||
static metrics_store_t *the_store;
|
static metrics_store_t *the_store;
|
||||||
|
|
||||||
|
/** Fill function for the RELAY_METRICS_NUM_OOM_BYTES metrics. */
|
||||||
|
static void
|
||||||
|
fill_oom_values(void)
|
||||||
|
{
|
||||||
|
metrics_store_entry_t *sentry;
|
||||||
|
const relay_metrics_entry_t *rentry =
|
||||||
|
&base_metrics[RELAY_METRICS_NUM_OOM_BYTES];
|
||||||
|
|
||||||
|
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
|
||||||
|
rentry->help);
|
||||||
|
metrics_store_entry_add_label(sentry, "subsys=cell");
|
||||||
|
metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_cell);
|
||||||
|
|
||||||
|
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
|
||||||
|
rentry->help);
|
||||||
|
metrics_store_entry_add_label(sentry, "subsys=dns");
|
||||||
|
metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_dns);
|
||||||
|
|
||||||
|
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
|
||||||
|
rentry->help);
|
||||||
|
metrics_store_entry_add_label(sentry, "subsys=geoip");
|
||||||
|
metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_geoip);
|
||||||
|
|
||||||
|
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
|
||||||
|
rentry->help);
|
||||||
|
metrics_store_entry_add_label(sentry, "subsys=hsdir");
|
||||||
|
metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_hsdir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reset the global store and fill it with all the metrics from base_metrics
|
||||||
|
* and their associated values.
|
||||||
|
*
|
||||||
|
* To pull this off, every metrics has a "fill" function that is called and in
|
||||||
|
* charge of adding the metrics to the store, appropriate labels and finally
|
||||||
|
* updating the value to report. */
|
||||||
|
static void
|
||||||
|
fill_store(void)
|
||||||
|
{
|
||||||
|
/* Reset the current store, we are about to fill it with all the things. */
|
||||||
|
metrics_store_reset(the_store);
|
||||||
|
|
||||||
|
/* Call the fill function for each metrics. */
|
||||||
|
for (size_t i = 0; i < num_base_metrics; i++) {
|
||||||
|
if (BUG(!base_metrics[i].fill_fn)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
base_metrics[i].fill_fn();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Return a list of all the relay metrics stores. This is the
|
/** Return a list of all the relay metrics stores. This is the
|
||||||
* function attached to the .get_metrics() member of the subsys_t. */
|
* function attached to the .get_metrics() member of the subsys_t. */
|
||||||
const smartlist_t *
|
const smartlist_t *
|
||||||
@ -36,6 +101,13 @@ relay_metrics_get_stores(void)
|
|||||||
* simply update it. */
|
* simply update it. */
|
||||||
static smartlist_t *stores_list = NULL;
|
static smartlist_t *stores_list = NULL;
|
||||||
|
|
||||||
|
/* We dynamically fill the store with all the metrics upon a request. The
|
||||||
|
* reason for this is because the exposed metrics of a relay are often
|
||||||
|
* internal counters in the fast path and thus we fetch the value when a
|
||||||
|
* metrics port request arrives instead of keeping a local metrics store of
|
||||||
|
* those values. */
|
||||||
|
fill_store();
|
||||||
|
|
||||||
if (!stores_list) {
|
if (!stores_list) {
|
||||||
stores_list = smartlist_new();
|
stores_list = smartlist_new();
|
||||||
smartlist_add(stores_list, the_store);
|
smartlist_add(stores_list, the_store);
|
||||||
|
@ -12,13 +12,11 @@
|
|||||||
#include "lib/container/smartlist.h"
|
#include "lib/container/smartlist.h"
|
||||||
#include "lib/metrics/metrics_common.h"
|
#include "lib/metrics/metrics_common.h"
|
||||||
|
|
||||||
#ifdef RELAY_METRICS_ENTRY_PRIVATE
|
|
||||||
|
|
||||||
/** Metrics key for each reported metrics. This key is also used as an index in
|
/** Metrics key for each reported metrics. This key is also used as an index in
|
||||||
* the base_metrics array. */
|
* the base_metrics array. */
|
||||||
typedef enum {
|
typedef enum {
|
||||||
/* XXX So code compiles. */
|
/** Number of OOM invocation. */
|
||||||
PLACEHOLDER = 0,
|
RELAY_METRICS_NUM_OOM_BYTES = 0,
|
||||||
} relay_metrics_key_t;
|
} relay_metrics_key_t;
|
||||||
|
|
||||||
/** The metadata of a relay metric. */
|
/** The metadata of a relay metric. */
|
||||||
@ -31,10 +29,10 @@ typedef struct relay_metrics_entry_t {
|
|||||||
const char *name;
|
const char *name;
|
||||||
/* Metrics output help comment. */
|
/* Metrics output help comment. */
|
||||||
const char *help;
|
const char *help;
|
||||||
|
/* Update value function. */
|
||||||
|
void (*fill_fn)(void);
|
||||||
} relay_metrics_entry_t;
|
} relay_metrics_entry_t;
|
||||||
|
|
||||||
#endif /* RELAY_METRICS_ENTRY_PRIVATE */
|
|
||||||
|
|
||||||
/* Init. */
|
/* Init. */
|
||||||
void relay_metrics_init(void);
|
void relay_metrics_init(void);
|
||||||
void relay_metrics_free(void);
|
void relay_metrics_free(void);
|
||||||
|
Loading…
Reference in New Issue
Block a user