rephist: Introduce a fraction and period for overload onionskin

This code was heavily reused from the previous DNS timeout work done in
ticket #40491 that was removed afterall from our code.

Closes #40560

Signed-off-by: David Goulet <dgoulet@torproject.org>
This commit is contained in:
David Goulet 2022-03-02 13:20:07 -05:00
parent d14af825ce
commit 7ce3edaee0
3 changed files with 195 additions and 25 deletions

View File

@ -166,8 +166,6 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin)
RATELIM_INIT(WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL);
if (onionskin->handshake_type == ONION_HANDSHAKE_TYPE_NTOR) {
char *m;
/* Note this ntor onionskin drop as an overload */
rep_hist_note_overload(OVERLOAD_GENERAL);
if ((m = rate_limit_log(&last_warned, approx_time()))) {
log_warn(LD_GENERAL,
"Your computer is too slow to handle this many circuit "

View File

@ -291,25 +291,6 @@ overload_general_dns_assessment(void)
approx_time() + overload_dns_timeout_period_secs;
}
/** Called just before the consensus will be replaced. Update the consensus
* parameters in case they changed. */
void
rep_hist_consensus_has_changed(const networkstatus_t *ns)
{
overload_dns_timeout_fraction =
networkstatus_get_param(ns, "overload_dns_timeout_scale_percent",
OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT,
OVERLOAD_DNS_TIMEOUT_PERCENT_MIN,
OVERLOAD_DNS_TIMEOUT_PERCENT_MAX) /
OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0;
overload_dns_timeout_period_secs =
networkstatus_get_param(ns, "overload_dns_timeout_period_secs",
OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT,
OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN,
OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX);
}
/** Note a DNS error for the given given libevent DNS record type and error
* code. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA.
*
@ -1913,12 +1894,144 @@ STATIC int onion_handshakes_requested[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
STATIC int onion_handshakes_assigned[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
/**@}*/
/** Counters keeping the same stats as above but for the entire duration of the
* process (not reset). */
static uint64_t stats_n_onionskin_assigned[MAX_ONION_STAT_TYPE+1] = {0};
static uint64_t stats_n_onionskin_dropped[MAX_ONION_STAT_TYPE+1] = {0};
/* We use a scale here so we can represent percentages with decimal points by
* scaling the value by this factor and so 0.5% becomes a value of 500.
* Default is 1% and thus min and max range is 0 to 100%. */
#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE 1000.0
#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT 1000
#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_MIN 0
#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_MAX 100000
/** Consensus parameter: indicate what fraction of ntor onionskin drop over the
* total number of requests must be reached before we trigger a general
* overload signal.*/
static double overload_onionskin_ntor_fraction =
OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT /
OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE / 100.0;
/* Number of seconds for the assessment period. Default is 6 hours (21600) and
* the min max range is within a 32bit value. We align this period to the
* Heartbeat so the logs would match this period more or less. */
#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT (60 * 60 * 6)
#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MIN 0
#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MAX INT32_MAX
/** Consensus parameter: Period, in seconds, over which we count the number of
* ntor onionskins requests and how many were dropped. After that period, we
* assess if we trigger an overload or not. */
static int32_t overload_onionskin_ntor_period_secs =
OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT;
/** Structure containing information for an assessment period of the onionskin
* drop overload general signal.
*
* It is used to track, within a time period, how many requests we've gotten
* and how many were dropped. The overload general signal is decided from these
* depending on some consensus parameters. */
typedef struct {
/** Total number of ntor onionskin requested for an assessment period. */
uint64_t n_ntor_requested;
/** Total number of dropped ntor onionskins for an assessment period. */
uint64_t n_ntor_dropped;
/** When is the next assessment time of the general overload for ntor
* onionskin drop. Once this time is reached, all stats are reset and this
* time is set to the next assessment time. */
time_t next_assessment_time;
} overload_onionskin_assessment_t;
/** Keep track of the onionskin requests for an assessment period. */
static overload_onionskin_assessment_t overload_onionskin_assessment;
/**
* We combine ntorv3 and ntor into the same stat, so we must
* use this function to covert the cell type to a stat index.
*/
static inline uint16_t
onionskin_type_to_stat(uint16_t type)
{
if (BUG(type > MAX_ONION_STAT_TYPE)) {
return MAX_ONION_STAT_TYPE; // use ntor if out of range
}
return type;
}
/** Assess our ntor handshake statistics and decide if we need to emit a
* general overload signal.
*
* Regardless of overloaded or not, if the assessment time period has passed,
* the stats are reset back to 0 and the assessment time period updated.
*
* This is called when a ntor handshake is _requested_ because we want to avoid
* to have an assymetric situation where requested counter is reset to 0 but
* then a drop happens leading to the drop counter being incremented while the
* requested counter is 0. */
static void
overload_general_onionskin_assessment(void)
{
/* Initialize the time. Should be done once. */
if (overload_onionskin_assessment.next_assessment_time == 0) {
goto reset;
}
/* Not the time yet. */
if (overload_onionskin_assessment.next_assessment_time > approx_time()) {
goto done;
}
/* Make sure we have enough requests to be able to make a proper assessment.
* We want to avoid 1 single request/drop to trigger an overload as we want
* at least the number of requests to be above the scale of our fraction. */
if (overload_onionskin_assessment.n_ntor_requested <
OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE) {
goto done;
}
/* Lets see if we can signal a general overload. */
double fraction = (double) overload_onionskin_assessment.n_ntor_dropped /
(double) overload_onionskin_assessment.n_ntor_requested;
if (fraction >= overload_onionskin_ntor_fraction) {
log_notice(LD_HIST, "General overload -> Ntor dropped (%" PRIu64 ") "
"fraction %.4f%% is above threshold of %.4f%%",
overload_onionskin_assessment.n_ntor_dropped,
fraction * 100.0,
overload_onionskin_ntor_fraction * 100.0);
rep_hist_note_overload(OVERLOAD_GENERAL);
}
reset:
/* Reset counters for the next period. */
overload_onionskin_assessment.n_ntor_dropped = 0;
overload_onionskin_assessment.n_ntor_requested = 0;
overload_onionskin_assessment.next_assessment_time =
approx_time() + overload_onionskin_ntor_period_secs;
done:
return;
}
/** A new onionskin (using the <b>type</b> handshake) has arrived. */
void
rep_hist_note_circuit_handshake_requested(uint16_t type)
{
if (type <= MAX_ONION_HANDSHAKE_TYPE)
onion_handshakes_requested[type]++;
uint16_t stat = onionskin_type_to_stat(type);
onion_handshakes_requested[stat]++;
/* Only relays get to record requested onionskins. */
if (stat == ONION_HANDSHAKE_TYPE_NTOR) {
/* Assess if we've reached the overload general signal. */
overload_general_onionskin_assessment();
overload_onionskin_assessment.n_ntor_requested++;
}
}
/** We've sent an onionskin (using the <b>type</b> handshake) to a
@ -1926,8 +2039,24 @@ rep_hist_note_circuit_handshake_requested(uint16_t type)
void
rep_hist_note_circuit_handshake_assigned(uint16_t type)
{
if (type <= MAX_ONION_HANDSHAKE_TYPE)
onion_handshakes_assigned[type]++;
onion_handshakes_assigned[onionskin_type_to_stat(type)]++;
stats_n_onionskin_assigned[onionskin_type_to_stat(type)]++;
}
/** We've just drop an onionskin (using the <b>type</b> handshake) due to being
* overloaded. */
void
rep_hist_note_circuit_handshake_dropped(uint16_t type)
{
uint16_t stat = onionskin_type_to_stat(type);
stats_n_onionskin_dropped[stat]++;
/* Only relays get to record requested onionskins. */
if (stat == ONION_HANDSHAKE_TYPE_NTOR) {
/* Note the dropped ntor in the overload assessment object. */
overload_onionskin_assessment.n_ntor_dropped++;
}
}
/** Get the circuit handshake value that is requested. */
@ -2522,6 +2651,39 @@ rep_hist_free_all(void)
tor_assert_nonfatal_once(rephist_total_num == 0);
}
/** Called just before the consensus will be replaced. Update the consensus
* parameters in case they changed. */
void
rep_hist_consensus_has_changed(const networkstatus_t *ns)
{
overload_onionskin_ntor_fraction =
networkstatus_get_param(ns, "overload_onionskin_ntor_scale_percent",
OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT,
OVERLOAD_ONIONSKIN_NTOR_PERCENT_MIN,
OVERLOAD_ONIONSKIN_NTOR_PERCENT_MAX) /
OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE / 100.0;
overload_onionskin_ntor_period_secs =
networkstatus_get_param(ns, "overload_onionskin_ntor_period_secs",
OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT,
OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MIN,
OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MAX);
/** XXX: Unused parameters. */
overload_dns_timeout_fraction =
networkstatus_get_param(ns, "overload_dns_timeout_scale_percent",
OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT,
OVERLOAD_DNS_TIMEOUT_PERCENT_MIN,
OVERLOAD_DNS_TIMEOUT_PERCENT_MAX) /
OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0;
overload_dns_timeout_period_secs =
networkstatus_get_param(ns, "overload_dns_timeout_period_secs",
OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT,
OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN,
OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX);
}
#ifdef TOR_UNIT_TESTS
/* only exists for unit tests: get HSv2 stats object */
const hs_v2_stats_t *

View File

@ -58,6 +58,7 @@ time_t rep_hist_desc_stats_write(time_t now);
void rep_hist_note_circuit_handshake_requested(uint16_t type);
void rep_hist_note_circuit_handshake_assigned(uint16_t type);
void rep_hist_note_circuit_handshake_dropped(uint16_t type);
void rep_hist_log_circuit_handshake_stats(time_t now);
MOCK_DECL(int, rep_hist_get_circuit_handshake_requested, (uint16_t type));
@ -79,8 +80,17 @@ void rep_hist_free_all(void);
void rep_hist_note_negotiated_link_proto(unsigned link_proto,
int started_here);
void rep_hist_log_link_protocol_counts(void);
uint64_t rep_hist_get_n_dns_error(int type, uint8_t error);
uint64_t rep_hist_get_n_dns_request(int type);
void rep_hist_note_dns_request(int type);
void rep_hist_note_dns_error(int type, uint8_t error);
void rep_hist_consensus_has_changed(const networkstatus_t *ns);
/** We have 3 stat types: tap, fast, and ntor. The max type is ntor (2) */
#define MAX_ONION_STAT_TYPE ONION_HANDSHAKE_TYPE_NTOR
extern uint64_t rephist_total_alloc;
extern uint32_t rephist_total_num;
#ifdef TOR_UNIT_TESTS