mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-30 15:43:32 +01:00
Implement backbone of overload statistics.
- Implement overload statistics structure. - Implement function that keeps track of overload statistics. - Implement function that writes overload statistics to descriptor. - Unittest for the whole logic.
This commit is contained in:
parent
80b6054bb0
commit
0a5ecb3342
6
changes/bug40222
Normal file
6
changes/bug40222
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
o Major features (metrics):
|
||||||
|
- Introduce a new class of extra-info statistics that allows relays to
|
||||||
|
expose details of how overloaded they are. This information is controlled
|
||||||
|
using the OverloadStatistics torrc option, and it will be used to make
|
||||||
|
more informed decisions about the network's load balancing. Implements
|
||||||
|
proposal 328; closes ticket 40222.
|
@ -352,6 +352,7 @@ static const config_var_t option_vars_[] = {
|
|||||||
V(CacheDirectoryGroupReadable, AUTOBOOL, "auto"),
|
V(CacheDirectoryGroupReadable, AUTOBOOL, "auto"),
|
||||||
V(CellStatistics, BOOL, "0"),
|
V(CellStatistics, BOOL, "0"),
|
||||||
V(PaddingStatistics, BOOL, "1"),
|
V(PaddingStatistics, BOOL, "1"),
|
||||||
|
V(OverloadStatistics, BOOL, "1"),
|
||||||
V(LearnCircuitBuildTimeout, BOOL, "1"),
|
V(LearnCircuitBuildTimeout, BOOL, "1"),
|
||||||
V(CircuitBuildTimeout, INTERVAL, "0"),
|
V(CircuitBuildTimeout, INTERVAL, "0"),
|
||||||
OBSOLETE("CircuitIdleTimeout"),
|
OBSOLETE("CircuitIdleTimeout"),
|
||||||
|
@ -674,6 +674,9 @@ struct or_options_t {
|
|||||||
/** If true, include statistics file contents in extra-info documents. */
|
/** If true, include statistics file contents in extra-info documents. */
|
||||||
int ExtraInfoStatistics;
|
int ExtraInfoStatistics;
|
||||||
|
|
||||||
|
/** If true, include overload statistics in extra-info documents. */
|
||||||
|
int OverloadStatistics;
|
||||||
|
|
||||||
/** If true, do not believe anybody who tells us that a domain resolves
|
/** If true, do not believe anybody who tells us that a domain resolves
|
||||||
* to an internal address, or that an internal address has a PTR mapping.
|
* to an internal address, or that an internal address has a PTR mapping.
|
||||||
* Helps avoid some cross-site attacks. */
|
* Helps avoid some cross-site attacks. */
|
||||||
|
@ -3372,6 +3372,12 @@ extrainfo_dump_to_string_stats_helper(smartlist_t *chunks,
|
|||||||
if (contents)
|
if (contents)
|
||||||
smartlist_add(chunks, contents);
|
smartlist_add(chunks, contents);
|
||||||
}
|
}
|
||||||
|
if (options->OverloadStatistics) {
|
||||||
|
contents = rep_hist_get_overload_stats_lines();
|
||||||
|
if (contents) {
|
||||||
|
smartlist_add(chunks, contents);
|
||||||
|
}
|
||||||
|
}
|
||||||
/* bridge statistics */
|
/* bridge statistics */
|
||||||
if (should_record_bridge_info(options)) {
|
if (should_record_bridge_info(options)) {
|
||||||
const char *bridge_stats = geoip_get_bridge_stats_extrainfo(now);
|
const char *bridge_stats = geoip_get_bridge_stats_extrainfo(now);
|
||||||
|
@ -183,6 +183,122 @@ static time_t started_tracking_stability = 0;
|
|||||||
/** Map from hex OR identity digest to or_history_t. */
|
/** Map from hex OR identity digest to or_history_t. */
|
||||||
static digestmap_t *history_map = NULL;
|
static digestmap_t *history_map = NULL;
|
||||||
|
|
||||||
|
/** Represents a state of overload stats.
|
||||||
|
*
|
||||||
|
* All the timestamps in this structure have already been rounded down to the
|
||||||
|
* nearest hour. */
|
||||||
|
typedef struct {
|
||||||
|
/* When did we last experience a general overload? */
|
||||||
|
time_t overload_general_time;
|
||||||
|
|
||||||
|
/* When did we last experience a bandwidth-related overload? */
|
||||||
|
time_t overload_ratelimits_time;
|
||||||
|
/* How many times have we gone off the our read limits? */
|
||||||
|
uint64_t overload_read_count;
|
||||||
|
/* How many times have we gone off the our write limits? */
|
||||||
|
uint64_t overload_write_count;
|
||||||
|
|
||||||
|
/* When did we last experience a file descriptor exhaustion? */
|
||||||
|
time_t overload_fd_exhausted_time;
|
||||||
|
/* How many times have we experienced a file descriptor exhaustion? */
|
||||||
|
uint64_t overload_fd_exhausted;
|
||||||
|
} overload_stats_t;
|
||||||
|
|
||||||
|
/** Current state of overload stats */
|
||||||
|
static overload_stats_t overload_stats;
|
||||||
|
|
||||||
|
/** Return true if this overload happened within the last `n_hours`. */
|
||||||
|
static bool
|
||||||
|
overload_happened_recently(time_t overload_time, unsigned n_hours)
|
||||||
|
{
|
||||||
|
/* An overload is relevant if it happened in the last 72 hours */
|
||||||
|
if (overload_time > approx_time() - 3600 * n_hours) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The current version of the overload stats version */
|
||||||
|
#define OVERLOAD_STATS_VERSION 1
|
||||||
|
|
||||||
|
/** Returns an allocated string for extra-info documents for publishing
|
||||||
|
* overload statistics. */
|
||||||
|
char *
|
||||||
|
rep_hist_get_overload_stats_lines(void)
|
||||||
|
{
|
||||||
|
char *result = NULL;
|
||||||
|
smartlist_t *chunks = smartlist_new();
|
||||||
|
char tbuf[ISO_TIME_LEN+1];
|
||||||
|
|
||||||
|
/* First encode the general overload */
|
||||||
|
if (overload_happened_recently(overload_stats.overload_general_time, 72)) {
|
||||||
|
format_iso_time(tbuf, overload_stats.overload_general_time);
|
||||||
|
smartlist_add_asprintf(chunks, "overload-general %d %s",
|
||||||
|
OVERLOAD_STATS_VERSION, tbuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now do bandwidth-related overloads */
|
||||||
|
if (overload_happened_recently(overload_stats.overload_ratelimits_time,24)) {
|
||||||
|
const or_options_t *options = get_options();
|
||||||
|
format_iso_time(tbuf, overload_stats.overload_ratelimits_time);
|
||||||
|
smartlist_add_asprintf(chunks,
|
||||||
|
"overload-ratelimits %d %s %" PRIu64 " %" PRIu64
|
||||||
|
" %" PRIu64 " %" PRIu64,
|
||||||
|
OVERLOAD_STATS_VERSION, tbuf,
|
||||||
|
options->BandwidthRate, options->BandwidthBurst,
|
||||||
|
overload_stats.overload_read_count,
|
||||||
|
overload_stats.overload_write_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finally file descriptor overloads */
|
||||||
|
if (overload_happened_recently(
|
||||||
|
overload_stats.overload_fd_exhausted_time, 72)) {
|
||||||
|
format_iso_time(tbuf, overload_stats.overload_fd_exhausted_time);
|
||||||
|
smartlist_add_asprintf(chunks, "overload-fd-exhausted %d %s",
|
||||||
|
OVERLOAD_STATS_VERSION, tbuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Bail early if we had nothing to write */
|
||||||
|
if (smartlist_len(chunks) == 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = smartlist_join_strings(chunks, "\n", 0, NULL);
|
||||||
|
|
||||||
|
done:
|
||||||
|
SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp));
|
||||||
|
smartlist_free(chunks);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Round down the time in `a` to the beginning of the current hour */
|
||||||
|
#define SET_TO_START_OF_HOUR(a) STMT_BEGIN \
|
||||||
|
(a) = approx_time() - (approx_time() % 3600); \
|
||||||
|
STMT_END
|
||||||
|
|
||||||
|
/** Note down an overload event of type `overload`. */
|
||||||
|
void
|
||||||
|
rep_hist_note_overload(overload_type_t overload)
|
||||||
|
{
|
||||||
|
switch (overload) {
|
||||||
|
case OVERLOAD_GENERAL:
|
||||||
|
SET_TO_START_OF_HOUR(overload_stats.overload_general_time);
|
||||||
|
break;
|
||||||
|
case OVERLOAD_READ:
|
||||||
|
SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time);
|
||||||
|
overload_stats.overload_read_count++;
|
||||||
|
break;
|
||||||
|
case OVERLOAD_WRITE:
|
||||||
|
SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time);
|
||||||
|
overload_stats.overload_write_count++;
|
||||||
|
break;
|
||||||
|
case OVERLOAD_FD_EXHAUSTED:
|
||||||
|
SET_TO_START_OF_HOUR(overload_stats.overload_fd_exhausted_time);
|
||||||
|
overload_stats.overload_fd_exhausted++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Return the or_history_t for the OR with identity digest <b>id</b>,
|
/** Return the or_history_t for the OR with identity digest <b>id</b>,
|
||||||
* creating it if necessary. */
|
* creating it if necessary. */
|
||||||
static or_history_t *
|
static or_history_t *
|
||||||
|
@ -140,6 +140,24 @@ void rep_hist_reset_padding_counts(void);
|
|||||||
void rep_hist_prep_published_padding_counts(time_t now);
|
void rep_hist_prep_published_padding_counts(time_t now);
|
||||||
void rep_hist_padding_count_timers(uint64_t num_timers);
|
void rep_hist_padding_count_timers(uint64_t num_timers);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents the various types of overload we keep track of and expose in our
|
||||||
|
* extra-info descriptor.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
/* A general overload -- can have many different causes. */
|
||||||
|
OVERLOAD_GENERAL,
|
||||||
|
/* We went over our configured read rate/burst bandwidth limit */
|
||||||
|
OVERLOAD_READ,
|
||||||
|
/* We went over our configured write rate/burst bandwidth limit */
|
||||||
|
OVERLOAD_WRITE,
|
||||||
|
/* We exhausted the file descriptors in this system */
|
||||||
|
OVERLOAD_FD_EXHAUSTED,
|
||||||
|
} overload_type_t;
|
||||||
|
|
||||||
|
void rep_hist_note_overload(overload_type_t overload);
|
||||||
|
char *rep_hist_get_overload_stats_lines(void);
|
||||||
|
|
||||||
#ifdef TOR_UNIT_TESTS
|
#ifdef TOR_UNIT_TESTS
|
||||||
struct hs_v2_stats_t;
|
struct hs_v2_stats_t;
|
||||||
const struct hs_v2_stats_t *rep_hist_get_hs_v2_stats(void);
|
const struct hs_v2_stats_t *rep_hist_get_hs_v2_stats(void);
|
||||||
|
@ -703,6 +703,114 @@ test_load_stats_file(void *arg)
|
|||||||
tor_free(content);
|
tor_free(content);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Test the overload stats logic. */
|
||||||
|
static void
|
||||||
|
test_overload_stats(void *arg)
|
||||||
|
{
|
||||||
|
time_t current_time = 1010101010;
|
||||||
|
char *stats_str = NULL;
|
||||||
|
(void) arg;
|
||||||
|
|
||||||
|
/* Change time to 03-01-2002 23:36 UTC */
|
||||||
|
/* This should make the extrainfo timestamp be "2002-01-03 23:00:00" */
|
||||||
|
update_approx_time(current_time);
|
||||||
|
|
||||||
|
/* With an empty rephist we shouldn't get anything back */
|
||||||
|
stats_str = rep_hist_get_overload_stats_lines();
|
||||||
|
tt_assert(!stats_str);
|
||||||
|
|
||||||
|
/* Note a DNS overload */
|
||||||
|
rep_hist_note_overload(OVERLOAD_GENERAL);
|
||||||
|
|
||||||
|
/* Move the time forward one hour */
|
||||||
|
current_time += 3600;
|
||||||
|
update_approx_time(current_time);
|
||||||
|
|
||||||
|
/* Now check the string */
|
||||||
|
stats_str = rep_hist_get_overload_stats_lines();
|
||||||
|
tt_str_op("overload-general 1 2002-01-03 23:00:00", OP_EQ, stats_str);
|
||||||
|
tor_free(stats_str);
|
||||||
|
|
||||||
|
/* Move the time forward 72 hours: see that the line has disappeared. */
|
||||||
|
current_time += 3600*72;
|
||||||
|
update_approx_time(current_time);
|
||||||
|
|
||||||
|
stats_str = rep_hist_get_overload_stats_lines();
|
||||||
|
tt_assert(!stats_str);
|
||||||
|
|
||||||
|
/* Now the time should be 2002-01-07 00:00:00 */
|
||||||
|
|
||||||
|
/* Note a DNS overload */
|
||||||
|
rep_hist_note_overload(OVERLOAD_GENERAL);
|
||||||
|
|
||||||
|
stats_str = rep_hist_get_overload_stats_lines();
|
||||||
|
tt_str_op("overload-general 1 2002-01-07 00:00:00", OP_EQ, stats_str);
|
||||||
|
tor_free(stats_str);
|
||||||
|
|
||||||
|
/* Also note an fd exhaustion event */
|
||||||
|
rep_hist_note_overload(OVERLOAD_FD_EXHAUSTED);
|
||||||
|
|
||||||
|
stats_str = rep_hist_get_overload_stats_lines();
|
||||||
|
tt_str_op("overload-general 1 2002-01-07 00:00:00\n"
|
||||||
|
"overload-fd-exhausted 1 2002-01-07 00:00:00", OP_EQ, stats_str);
|
||||||
|
tor_free(stats_str);
|
||||||
|
|
||||||
|
/* Move the time forward. Register DNS overload. See that the time changed */
|
||||||
|
current_time += 3600*2;
|
||||||
|
update_approx_time(current_time);
|
||||||
|
|
||||||
|
rep_hist_note_overload(OVERLOAD_GENERAL);
|
||||||
|
|
||||||
|
stats_str = rep_hist_get_overload_stats_lines();
|
||||||
|
tt_str_op("overload-general 1 2002-01-07 02:00:00\n"
|
||||||
|
"overload-fd-exhausted 1 2002-01-07 00:00:00", OP_EQ, stats_str);
|
||||||
|
tor_free(stats_str);
|
||||||
|
|
||||||
|
/* Move the time forward. Register a bandwidth ratelimit event. See that the
|
||||||
|
string is added */
|
||||||
|
current_time += 3600*2;
|
||||||
|
update_approx_time(current_time);
|
||||||
|
|
||||||
|
/* Register the rate limit event */
|
||||||
|
rep_hist_note_overload(OVERLOAD_READ);
|
||||||
|
/* Also set some rate limiting values that should be reflected on the log */
|
||||||
|
get_options_mutable()->BandwidthRate = 1000;
|
||||||
|
get_options_mutable()->BandwidthBurst = 2000;
|
||||||
|
|
||||||
|
stats_str = rep_hist_get_overload_stats_lines();
|
||||||
|
tt_str_op("overload-general 1 2002-01-07 02:00:00\n"
|
||||||
|
"overload-ratelimits 1 2002-01-07 04:00:00 1000 2000 1 0\n"
|
||||||
|
"overload-fd-exhausted 1 2002-01-07 00:00:00", OP_EQ, stats_str);
|
||||||
|
tor_free(stats_str);
|
||||||
|
|
||||||
|
/* Move the time forward 24 hours: no rate limit line anymore. */
|
||||||
|
current_time += 3600*24;
|
||||||
|
update_approx_time(current_time);
|
||||||
|
|
||||||
|
stats_str = rep_hist_get_overload_stats_lines();
|
||||||
|
tt_str_op("overload-general 1 2002-01-07 02:00:00\n"
|
||||||
|
"overload-fd-exhausted 1 2002-01-07 00:00:00", OP_EQ, stats_str);
|
||||||
|
tor_free(stats_str);
|
||||||
|
|
||||||
|
/* Move the time forward 44 hours: no fd exhausted line anymore. */
|
||||||
|
current_time += 3600*44;
|
||||||
|
update_approx_time(current_time);
|
||||||
|
|
||||||
|
stats_str = rep_hist_get_overload_stats_lines();
|
||||||
|
tt_str_op("overload-general 1 2002-01-07 02:00:00", OP_EQ, stats_str);
|
||||||
|
tor_free(stats_str);
|
||||||
|
|
||||||
|
/* Move the time forward 2 hours: there is nothing left. */
|
||||||
|
current_time += 3600*2;
|
||||||
|
update_approx_time(current_time);
|
||||||
|
|
||||||
|
stats_str = rep_hist_get_overload_stats_lines();
|
||||||
|
tt_assert(!stats_str);
|
||||||
|
|
||||||
|
done:
|
||||||
|
tor_free(stats_str);
|
||||||
|
}
|
||||||
|
|
||||||
#define ENT(name) \
|
#define ENT(name) \
|
||||||
{ #name, test_ ## name , 0, NULL, NULL }
|
{ #name, test_ ## name , 0, NULL, NULL }
|
||||||
#define FORK(name) \
|
#define FORK(name) \
|
||||||
@ -718,6 +826,7 @@ struct testcase_t stats_tests[] = {
|
|||||||
FORK(get_bandwidth_lines),
|
FORK(get_bandwidth_lines),
|
||||||
FORK(rephist_v3_onions),
|
FORK(rephist_v3_onions),
|
||||||
FORK(load_stats_file),
|
FORK(load_stats_file),
|
||||||
|
FORK(overload_stats),
|
||||||
|
|
||||||
END_OF_TESTCASES
|
END_OF_TESTCASES
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user