mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-24 04:13:28 +01:00
Merge remote-tracking branch 'tor-gitlab/mr/500' into main
This commit is contained in:
commit
193781e6ef
11
changes/ticket40527
Normal file
11
changes/ticket40527
Normal file
@ -0,0 +1,11 @@
|
||||
o Major bugfixes (relay, overload):
|
||||
- Don't make Tor DNS timeout trigger an overload general state. These
|
||||
timeouts are different from DNS server timeout. They have to be seen as
|
||||
timeout related to UX and not because of a network problem. Fixes bug
|
||||
40527; bugfix on 0.4.6.1-alpha.
|
||||
- Change the MetricsPort DNS "timeout" label to be "tor_timeout" in order
|
||||
to indicate that this was a DNS timeout from tor perspective and not the
|
||||
DNS server itself.
|
||||
- Deprecate overload_dns_timeout_period_secs and
|
||||
overload_dns_timeout_scale_percent consensus parameters as well. They
|
||||
were used to assess the overload state which is no more now.
|
@ -1666,7 +1666,6 @@ notify_before_networkstatus_changes(const networkstatus_t *old_c,
|
||||
dos_consensus_has_changed(new_c);
|
||||
relay_consensus_has_changed(new_c);
|
||||
hs_dos_consensus_has_changed(new_c);
|
||||
rep_hist_consensus_has_changed(new_c);
|
||||
}
|
||||
|
||||
/* Called after a new consensus has been put in the global state. It is safe
|
||||
|
@ -161,7 +161,7 @@ fill_dns_error_values(void)
|
||||
{ .name = "refused", .key = DNS_ERR_REFUSED },
|
||||
{ .name = "truncated", .key = DNS_ERR_TRUNCATED },
|
||||
{ .name = "unknown", .key = DNS_ERR_UNKNOWN },
|
||||
{ .name = "timeout", .key = DNS_ERR_TIMEOUT },
|
||||
{ .name = "tor_timeout", .key = DNS_ERR_TIMEOUT },
|
||||
{ .name = "shutdown", .key = DNS_ERR_SHUTDOWN },
|
||||
{ .name = "cancel", .key = DNS_ERR_CANCEL },
|
||||
{ .name = "nodata", .key = DNS_ERR_NODATA },
|
||||
|
@ -219,33 +219,6 @@ static uint64_t stats_n_tcp_exhaustion = 0;
|
||||
|
||||
/***** DNS statistics *****/
|
||||
|
||||
/* We use a scale here so we can represent percentages with decimal points by
|
||||
* scaling the value by this factor and so 0.5% becomes a value of 500.
|
||||
* Default is 1% and thus min and max range is 0 to 100%. */
|
||||
#define OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE 1000.0
|
||||
#define OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT 1000
|
||||
#define OVERLOAD_DNS_TIMEOUT_PERCENT_MIN 0
|
||||
#define OVERLOAD_DNS_TIMEOUT_PERCENT_MAX 100000
|
||||
|
||||
/** Consensus parameter: indicate what fraction of DNS timeout errors over the
|
||||
* total number of DNS requests must be reached before we trigger a general
|
||||
* overload signal .*/
|
||||
static double overload_dns_timeout_fraction =
|
||||
OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT /
|
||||
OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0;
|
||||
|
||||
/* Number of seconds for the assessment period. Default is 10 minutes (600) and
|
||||
* the min max range is within a 32bit value. */
|
||||
#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT (10 * 60)
|
||||
#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN 0
|
||||
#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX INT32_MAX
|
||||
|
||||
/** Consensus parameter: Period, in seconds, over which we count the number of
|
||||
* DNS requests and timeout errors. After that period, we assess if we trigger
|
||||
* an overload or not. */
|
||||
static int32_t overload_dns_timeout_period_secs =
|
||||
OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT;
|
||||
|
||||
/** Overload DNS statistics. The information in this object is used to assess
|
||||
* if, due to DNS errors, we should emit a general overload signal or not.
|
||||
*
|
||||
@ -260,9 +233,6 @@ typedef struct {
|
||||
* right before the DNS request is initiated. */
|
||||
uint64_t stats_n_request;
|
||||
|
||||
/** Total number of DNS timeout errors. */
|
||||
uint64_t stats_n_error_timeout;
|
||||
|
||||
/** When is the next assessment time of the general overload for DNS errors.
|
||||
* Once this time is reached, all stats are reset and this time is set to the
|
||||
* next assessment time. */
|
||||
@ -285,7 +255,7 @@ typedef struct {
|
||||
/* Total number of DNS errors specific to libevent. */
|
||||
uint64_t stats_n_error_truncated; /* 65 */
|
||||
uint64_t stats_n_error_unknown; /* 66 */
|
||||
uint64_t stats_n_error_timeout; /* 67 */
|
||||
uint64_t stats_n_error_tor_timeout; /* 67 */
|
||||
uint64_t stats_n_error_shutdown; /* 68 */
|
||||
uint64_t stats_n_error_cancel; /* 69 */
|
||||
uint64_t stats_n_error_nodata; /* 70 */
|
||||
@ -337,60 +307,6 @@ get_dns_stats_by_type(const int type)
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Assess the DNS timeout errors and if we have enough to trigger a general
|
||||
* overload. */
|
||||
static void
|
||||
overload_general_dns_assessment(void)
|
||||
{
|
||||
/* Initialize the time. Should be done once. */
|
||||
if (overload_dns_stats.next_assessment_time == 0) {
|
||||
goto reset;
|
||||
}
|
||||
|
||||
/* Not the time yet. */
|
||||
if (overload_dns_stats.next_assessment_time > approx_time()) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Lets see if we can signal a general overload. */
|
||||
double fraction = (double) overload_dns_stats.stats_n_error_timeout /
|
||||
(double) overload_dns_stats.stats_n_request;
|
||||
if (fraction >= overload_dns_timeout_fraction) {
|
||||
log_notice(LD_HIST, "General overload -> DNS timeouts (%" PRIu64 ") "
|
||||
"fraction %.4f%% is above threshold of %.4f%%",
|
||||
overload_dns_stats.stats_n_error_timeout,
|
||||
fraction * 100.0,
|
||||
overload_dns_timeout_fraction * 100.0);
|
||||
rep_hist_note_overload(OVERLOAD_GENERAL);
|
||||
}
|
||||
|
||||
reset:
|
||||
/* Reset counters for the next period. */
|
||||
overload_dns_stats.stats_n_error_timeout = 0;
|
||||
overload_dns_stats.stats_n_request = 0;
|
||||
overload_dns_stats.next_assessment_time =
|
||||
approx_time() + overload_dns_timeout_period_secs;
|
||||
}
|
||||
|
||||
/** Called just before the consensus will be replaced. Update the consensus
|
||||
* parameters in case they changed. */
|
||||
void
|
||||
rep_hist_consensus_has_changed(const networkstatus_t *ns)
|
||||
{
|
||||
overload_dns_timeout_fraction =
|
||||
networkstatus_get_param(ns, "overload_dns_timeout_scale_percent",
|
||||
OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT,
|
||||
OVERLOAD_DNS_TIMEOUT_PERCENT_MIN,
|
||||
OVERLOAD_DNS_TIMEOUT_PERCENT_MAX) /
|
||||
OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0;
|
||||
|
||||
overload_dns_timeout_period_secs =
|
||||
networkstatus_get_param(ns, "overload_dns_timeout_period_secs",
|
||||
OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT,
|
||||
OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN,
|
||||
OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX);
|
||||
}
|
||||
|
||||
/** Return the DNS error count for the given libevent DNS type and error code.
|
||||
* The possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. */
|
||||
uint64_t
|
||||
@ -419,7 +335,7 @@ rep_hist_get_n_dns_error(int type, uint8_t error)
|
||||
case DNS_ERR_UNKNOWN:
|
||||
return dns_stats->stats_n_error_unknown;
|
||||
case DNS_ERR_TIMEOUT:
|
||||
return dns_stats->stats_n_error_timeout;
|
||||
return dns_stats->stats_n_error_tor_timeout;
|
||||
case DNS_ERR_SHUTDOWN:
|
||||
return dns_stats->stats_n_error_shutdown;
|
||||
case DNS_ERR_CANCEL:
|
||||
@ -454,16 +370,6 @@ rep_hist_get_n_dns_request(int type)
|
||||
void
|
||||
rep_hist_note_dns_error(int type, uint8_t error)
|
||||
{
|
||||
/* Assess if we need to trigger a general overload with regards to the DNS
|
||||
* errors or not. */
|
||||
overload_general_dns_assessment();
|
||||
|
||||
/* Because of the libevent problem (see note in the function comment), we
|
||||
* disregard the DNS query type and keep track of DNS timeout for the
|
||||
* overload state. */
|
||||
if (error == DNS_ERR_TIMEOUT) {
|
||||
overload_dns_stats.stats_n_error_timeout++;
|
||||
}
|
||||
overload_dns_stats.stats_n_request++;
|
||||
|
||||
/* Again, the libevent bug (see function comment), for an error that is
|
||||
@ -502,7 +408,7 @@ rep_hist_note_dns_error(int type, uint8_t error)
|
||||
dns_stats->stats_n_error_unknown++;
|
||||
break;
|
||||
case DNS_ERR_TIMEOUT:
|
||||
dns_stats->stats_n_error_timeout++;
|
||||
dns_stats->stats_n_error_tor_timeout++;
|
||||
break;
|
||||
case DNS_ERR_SHUTDOWN:
|
||||
dns_stats->stats_n_error_shutdown++;
|
||||
|
@ -89,8 +89,6 @@ uint64_t rep_hist_get_n_dns_request(int type);
|
||||
void rep_hist_note_dns_request(int type);
|
||||
void rep_hist_note_dns_error(int type, uint8_t error);
|
||||
|
||||
void rep_hist_consensus_has_changed(const networkstatus_t *ns);
|
||||
|
||||
extern uint64_t rephist_total_alloc;
|
||||
extern uint32_t rephist_total_num;
|
||||
#ifdef TOR_UNIT_TESTS
|
||||
|
@ -721,7 +721,7 @@ test_overload_stats(void *arg)
|
||||
stats_str = rep_hist_get_overload_stats_lines();
|
||||
tt_assert(!stats_str);
|
||||
|
||||
/* Note a DNS overload */
|
||||
/* Note an overload */
|
||||
rep_hist_note_overload(OVERLOAD_GENERAL);
|
||||
|
||||
/* Move the time forward one hour */
|
||||
@ -742,7 +742,7 @@ test_overload_stats(void *arg)
|
||||
|
||||
/* Now the time should be 2002-01-07 00:00:00 */
|
||||
|
||||
/* Note a DNS overload */
|
||||
/* Note an overload */
|
||||
rep_hist_note_overload(OVERLOAD_GENERAL);
|
||||
|
||||
stats_str = rep_hist_get_overload_general_line();
|
||||
@ -760,7 +760,7 @@ test_overload_stats(void *arg)
|
||||
tt_str_op("overload-fd-exhausted 1 2002-01-07 00:00:00\n", OP_EQ, stats_str);
|
||||
tor_free(stats_str);
|
||||
|
||||
/* Move the time forward. Register DNS overload. See that the time changed */
|
||||
/* Move the time forward. Register overload. See that the time changed */
|
||||
current_time += 3600*2;
|
||||
update_approx_time(current_time);
|
||||
|
||||
@ -867,81 +867,6 @@ test_overload_stats(void *arg)
|
||||
tor_free(stats_str);
|
||||
}
|
||||
|
||||
/** Test the overload stats logic. */
|
||||
static void
|
||||
test_overload_dns_timeout(void *arg)
|
||||
{
|
||||
char *stats_str = NULL;
|
||||
(void) arg;
|
||||
|
||||
/* Lets simulate a series of timeouts but below our default 1% threshold. */
|
||||
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
/* This should trigger 9 timeouts which is just below 1% (10) */
|
||||
if (i > 0 && !(i % 100)) {
|
||||
rep_hist_note_dns_error(0, DNS_ERR_TIMEOUT);
|
||||
} else {
|
||||
rep_hist_note_dns_error(0, DNS_ERR_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
/* No overload yet. */
|
||||
stats_str = rep_hist_get_overload_general_line();
|
||||
tt_assert(!stats_str);
|
||||
|
||||
/* Move it 10 minutes in the future and see if we get a general overload. */
|
||||
update_approx_time(approx_time() + (10 * 60));
|
||||
|
||||
/* This query should NOT trigger the general overload because we are below
|
||||
* our default of 1%. */
|
||||
rep_hist_note_dns_error(0, DNS_ERR_NONE);
|
||||
stats_str = rep_hist_get_overload_general_line();
|
||||
tt_assert(!stats_str);
|
||||
|
||||
/* We'll now go above our 1% threshold. */
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
/* This should trigger 10 timeouts which is our threshold of 1% (10) */
|
||||
if (!(i % 10)) {
|
||||
rep_hist_note_dns_error(0, DNS_ERR_TIMEOUT);
|
||||
} else {
|
||||
rep_hist_note_dns_error(0, DNS_ERR_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
/* Move it 10 minutes in the future and see if we get a general overload. */
|
||||
update_approx_time(approx_time() + (10 * 60));
|
||||
|
||||
/* This query should trigger the general overload because we are above 1%. */
|
||||
rep_hist_note_dns_error(0, DNS_ERR_NONE);
|
||||
stats_str = rep_hist_get_overload_general_line();
|
||||
tt_assert(stats_str);
|
||||
tor_free(stats_str);
|
||||
|
||||
/* Move 72h in the future, we should NOT get an overload anymore. */
|
||||
update_approx_time(approx_time() + (72 * 3600));
|
||||
|
||||
stats_str = rep_hist_get_overload_general_line();
|
||||
tt_assert(!stats_str);
|
||||
|
||||
/* This query should NOT trigger the general overload. */
|
||||
rep_hist_note_dns_error(0, DNS_ERR_TIMEOUT);
|
||||
stats_str = rep_hist_get_overload_general_line();
|
||||
tt_assert(!stats_str);
|
||||
|
||||
/* Move it 10 minutes in the future and see if we get a general overload. We
|
||||
* have now 100% of requests timing out. */
|
||||
update_approx_time(approx_time() + (10 * 60));
|
||||
|
||||
/* This query should trigger the general overload with 50% of timeouts. */
|
||||
rep_hist_note_dns_error(0, DNS_ERR_NONE);
|
||||
stats_str = rep_hist_get_overload_general_line();
|
||||
tt_assert(stats_str);
|
||||
tor_free(stats_str);
|
||||
|
||||
done:
|
||||
tor_free(stats_str);
|
||||
}
|
||||
|
||||
#define ENT(name) \
|
||||
{ #name, test_ ## name , 0, NULL, NULL }
|
||||
#define FORK(name) \
|
||||
@ -958,7 +883,6 @@ struct testcase_t stats_tests[] = {
|
||||
FORK(rephist_v3_onions),
|
||||
FORK(load_stats_file),
|
||||
FORK(overload_stats),
|
||||
FORK(overload_dns_timeout),
|
||||
|
||||
END_OF_TESTCASES
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user