From 0a5ecb334298187a64f58382231245111130aa76 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 9 Mar 2021 15:36:40 +0200 Subject: [PATCH 1/5] Implement backbone of overload statistics. - Implement overload statistics structure. - Implement function that keeps track of overload statistics. - Implement function that writes overload statistics to descriptor. - Unittest for the whole logic. --- changes/bug40222 | 6 ++ src/app/config/config.c | 1 + src/app/config/or_options_st.h | 3 + src/feature/relay/router.c | 6 ++ src/feature/stats/rephist.c | 116 +++++++++++++++++++++++++++++++++ src/feature/stats/rephist.h | 18 +++++ src/test/test_stats.c | 109 +++++++++++++++++++++++++++++++ 7 files changed, 259 insertions(+) create mode 100644 changes/bug40222 diff --git a/changes/bug40222 b/changes/bug40222 new file mode 100644 index 0000000000..8338ea4a7b --- /dev/null +++ b/changes/bug40222 @@ -0,0 +1,6 @@ + o Major features (metrics): + - Introduce a new class of extra-info statistics that allows relays to + expose details of how overloaded they are. This information is controlled + using the OverloadStatistics torrc option, and it will be used to make + more informed decisions about the network's load balancing. Implements + proposal 328; closes ticket 40222. diff --git a/src/app/config/config.c b/src/app/config/config.c index 1ac460bac0..156e90a549 100644 --- a/src/app/config/config.c +++ b/src/app/config/config.c @@ -352,6 +352,7 @@ static const config_var_t option_vars_[] = { V(CacheDirectoryGroupReadable, AUTOBOOL, "auto"), V(CellStatistics, BOOL, "0"), V(PaddingStatistics, BOOL, "1"), + V(OverloadStatistics, BOOL, "1"), V(LearnCircuitBuildTimeout, BOOL, "1"), V(CircuitBuildTimeout, INTERVAL, "0"), OBSOLETE("CircuitIdleTimeout"), diff --git a/src/app/config/or_options_st.h b/src/app/config/or_options_st.h index 90302eae7b..689adbc71b 100644 --- a/src/app/config/or_options_st.h +++ b/src/app/config/or_options_st.h @@ -674,6 +674,9 @@ struct or_options_t { /** If true, include statistics file contents in extra-info documents. */ int ExtraInfoStatistics; + /** If true, include overload statistics in extra-info documents. */ + int OverloadStatistics; + /** If true, do not believe anybody who tells us that a domain resolves * to an internal address, or that an internal address has a PTR mapping. * Helps avoid some cross-site attacks. */ diff --git a/src/feature/relay/router.c b/src/feature/relay/router.c index 73e8393761..88160bd1cb 100644 --- a/src/feature/relay/router.c +++ b/src/feature/relay/router.c @@ -3372,6 +3372,12 @@ extrainfo_dump_to_string_stats_helper(smartlist_t *chunks, if (contents) smartlist_add(chunks, contents); } + if (options->OverloadStatistics) { + contents = rep_hist_get_overload_stats_lines(); + if (contents) { + smartlist_add(chunks, contents); + } + } /* bridge statistics */ if (should_record_bridge_info(options)) { const char *bridge_stats = geoip_get_bridge_stats_extrainfo(now); diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c index f7c9336bff..64bec000df 100644 --- a/src/feature/stats/rephist.c +++ b/src/feature/stats/rephist.c @@ -183,6 +183,122 @@ static time_t started_tracking_stability = 0; /** Map from hex OR identity digest to or_history_t. */ static digestmap_t *history_map = NULL; +/** Represents a state of overload stats. + * + * All the timestamps in this structure have already been rounded down to the + * nearest hour. */ +typedef struct { + /* When did we last experience a general overload? */ + time_t overload_general_time; + + /* When did we last experience a bandwidth-related overload? */ + time_t overload_ratelimits_time; + /* How many times have we gone off the our read limits? */ + uint64_t overload_read_count; + /* How many times have we gone off the our write limits? */ + uint64_t overload_write_count; + + /* When did we last experience a file descriptor exhaustion? */ + time_t overload_fd_exhausted_time; + /* How many times have we experienced a file descriptor exhaustion? */ + uint64_t overload_fd_exhausted; +} overload_stats_t; + +/** Current state of overload stats */ +static overload_stats_t overload_stats; + +/** Return true if this overload happened within the last `n_hours`. */ +static bool +overload_happened_recently(time_t overload_time, unsigned n_hours) +{ + /* An overload is relevant if it happened in the last 72 hours */ + if (overload_time > approx_time() - 3600 * n_hours) { + return true; + } + return false; +} + +/* The current version of the overload stats version */ +#define OVERLOAD_STATS_VERSION 1 + +/** Returns an allocated string for extra-info documents for publishing + * overload statistics. */ +char * +rep_hist_get_overload_stats_lines(void) +{ + char *result = NULL; + smartlist_t *chunks = smartlist_new(); + char tbuf[ISO_TIME_LEN+1]; + + /* First encode the general overload */ + if (overload_happened_recently(overload_stats.overload_general_time, 72)) { + format_iso_time(tbuf, overload_stats.overload_general_time); + smartlist_add_asprintf(chunks, "overload-general %d %s", + OVERLOAD_STATS_VERSION, tbuf); + } + + /* Now do bandwidth-related overloads */ + if (overload_happened_recently(overload_stats.overload_ratelimits_time,24)) { + const or_options_t *options = get_options(); + format_iso_time(tbuf, overload_stats.overload_ratelimits_time); + smartlist_add_asprintf(chunks, + "overload-ratelimits %d %s %" PRIu64 " %" PRIu64 + " %" PRIu64 " %" PRIu64, + OVERLOAD_STATS_VERSION, tbuf, + options->BandwidthRate, options->BandwidthBurst, + overload_stats.overload_read_count, + overload_stats.overload_write_count); + } + + /* Finally file descriptor overloads */ + if (overload_happened_recently( + overload_stats.overload_fd_exhausted_time, 72)) { + format_iso_time(tbuf, overload_stats.overload_fd_exhausted_time); + smartlist_add_asprintf(chunks, "overload-fd-exhausted %d %s", + OVERLOAD_STATS_VERSION, tbuf); + } + + /* Bail early if we had nothing to write */ + if (smartlist_len(chunks) == 0) { + goto done; + } + + result = smartlist_join_strings(chunks, "\n", 0, NULL); + + done: + SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp)); + smartlist_free(chunks); + return result; +} + +/** Round down the time in `a` to the beginning of the current hour */ +#define SET_TO_START_OF_HOUR(a) STMT_BEGIN \ + (a) = approx_time() - (approx_time() % 3600); \ +STMT_END + +/** Note down an overload event of type `overload`. */ +void +rep_hist_note_overload(overload_type_t overload) +{ + switch (overload) { + case OVERLOAD_GENERAL: + SET_TO_START_OF_HOUR(overload_stats.overload_general_time); + break; + case OVERLOAD_READ: + SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time); + overload_stats.overload_read_count++; + break; + case OVERLOAD_WRITE: + SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time); + overload_stats.overload_write_count++; + break; + case OVERLOAD_FD_EXHAUSTED: + SET_TO_START_OF_HOUR(overload_stats.overload_fd_exhausted_time); + overload_stats.overload_fd_exhausted++; + break; + } +} + /** Return the or_history_t for the OR with identity digest id, * creating it if necessary. */ static or_history_t * diff --git a/src/feature/stats/rephist.h b/src/feature/stats/rephist.h index 7ecbfebfd7..45020d7967 100644 --- a/src/feature/stats/rephist.h +++ b/src/feature/stats/rephist.h @@ -140,6 +140,24 @@ void rep_hist_reset_padding_counts(void); void rep_hist_prep_published_padding_counts(time_t now); void rep_hist_padding_count_timers(uint64_t num_timers); +/** + * Represents the various types of overload we keep track of and expose in our + * extra-info descriptor. +*/ +typedef enum { + /* A general overload -- can have many different causes. */ + OVERLOAD_GENERAL, + /* We went over our configured read rate/burst bandwidth limit */ + OVERLOAD_READ, + /* We went over our configured write rate/burst bandwidth limit */ + OVERLOAD_WRITE, + /* We exhausted the file descriptors in this system */ + OVERLOAD_FD_EXHAUSTED, +} overload_type_t; + +void rep_hist_note_overload(overload_type_t overload); +char *rep_hist_get_overload_stats_lines(void); + #ifdef TOR_UNIT_TESTS struct hs_v2_stats_t; const struct hs_v2_stats_t *rep_hist_get_hs_v2_stats(void); diff --git a/src/test/test_stats.c b/src/test/test_stats.c index 617a36faba..1f14255e31 100644 --- a/src/test/test_stats.c +++ b/src/test/test_stats.c @@ -703,6 +703,114 @@ test_load_stats_file(void *arg) tor_free(content); } +/** Test the overload stats logic. */ +static void +test_overload_stats(void *arg) +{ + time_t current_time = 1010101010; + char *stats_str = NULL; + (void) arg; + + /* Change time to 03-01-2002 23:36 UTC */ + /* This should make the extrainfo timestamp be "2002-01-03 23:00:00" */ + update_approx_time(current_time); + + /* With an empty rephist we shouldn't get anything back */ + stats_str = rep_hist_get_overload_stats_lines(); + tt_assert(!stats_str); + + /* Note a DNS overload */ + rep_hist_note_overload(OVERLOAD_GENERAL); + + /* Move the time forward one hour */ + current_time += 3600; + update_approx_time(current_time); + + /* Now check the string */ + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-general 1 2002-01-03 23:00:00", OP_EQ, stats_str); + tor_free(stats_str); + + /* Move the time forward 72 hours: see that the line has disappeared. */ + current_time += 3600*72; + update_approx_time(current_time); + + stats_str = rep_hist_get_overload_stats_lines(); + tt_assert(!stats_str); + + /* Now the time should be 2002-01-07 00:00:00 */ + + /* Note a DNS overload */ + rep_hist_note_overload(OVERLOAD_GENERAL); + + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-general 1 2002-01-07 00:00:00", OP_EQ, stats_str); + tor_free(stats_str); + + /* Also note an fd exhaustion event */ + rep_hist_note_overload(OVERLOAD_FD_EXHAUSTED); + + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-general 1 2002-01-07 00:00:00\n" + "overload-fd-exhausted 1 2002-01-07 00:00:00", OP_EQ, stats_str); + tor_free(stats_str); + + /* Move the time forward. Register DNS overload. See that the time changed */ + current_time += 3600*2; + update_approx_time(current_time); + + rep_hist_note_overload(OVERLOAD_GENERAL); + + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-general 1 2002-01-07 02:00:00\n" + "overload-fd-exhausted 1 2002-01-07 00:00:00", OP_EQ, stats_str); + tor_free(stats_str); + + /* Move the time forward. Register a bandwidth ratelimit event. See that the + string is added */ + current_time += 3600*2; + update_approx_time(current_time); + + /* Register the rate limit event */ + rep_hist_note_overload(OVERLOAD_READ); + /* Also set some rate limiting values that should be reflected on the log */ + get_options_mutable()->BandwidthRate = 1000; + get_options_mutable()->BandwidthBurst = 2000; + + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-general 1 2002-01-07 02:00:00\n" + "overload-ratelimits 1 2002-01-07 04:00:00 1000 2000 1 0\n" + "overload-fd-exhausted 1 2002-01-07 00:00:00", OP_EQ, stats_str); + tor_free(stats_str); + + /* Move the time forward 24 hours: no rate limit line anymore. */ + current_time += 3600*24; + update_approx_time(current_time); + + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-general 1 2002-01-07 02:00:00\n" + "overload-fd-exhausted 1 2002-01-07 00:00:00", OP_EQ, stats_str); + tor_free(stats_str); + + /* Move the time forward 44 hours: no fd exhausted line anymore. */ + current_time += 3600*44; + update_approx_time(current_time); + + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-general 1 2002-01-07 02:00:00", OP_EQ, stats_str); + tor_free(stats_str); + + /* Move the time forward 2 hours: there is nothing left. */ + current_time += 3600*2; + update_approx_time(current_time); + + stats_str = rep_hist_get_overload_stats_lines(); + tt_assert(!stats_str); + + done: + tor_free(stats_str); +} + #define ENT(name) \ { #name, test_ ## name , 0, NULL, NULL } #define FORK(name) \ @@ -718,6 +826,7 @@ struct testcase_t stats_tests[] = { FORK(get_bandwidth_lines), FORK(rephist_v3_onions), FORK(load_stats_file), + FORK(overload_stats), END_OF_TESTCASES }; From f493a12e897b02f2e347078dc3e2a2437c324b66 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 9 Mar 2021 15:39:03 +0200 Subject: [PATCH 2/5] Implement straightforward overload general metrics. - OOM metric - onionskin overload metric - DNS timeout metric --- src/core/or/relay.c | 4 ++++ src/feature/relay/dns.c | 11 +++++++++++ src/feature/relay/onion_queue.c | 23 ++++++++++++++--------- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/core/or/relay.c b/src/core/or/relay.c index 32d6ca731a..d0de81dd7e 100644 --- a/src/core/or/relay.c +++ b/src/core/or/relay.c @@ -83,6 +83,7 @@ #include "feature/nodelist/routerlist.h" #include "core/or/scheduler.h" #include "feature/hs/hs_metrics.h" +#include "feature/stats/rephist.h" #include "core/or/cell_st.h" #include "core/or/cell_queue_st.h" @@ -2720,6 +2721,9 @@ cell_queues_check_size(void) if (alloc >= get_options()->MaxMemInQueues_low_threshold) { last_time_under_memory_pressure = approx_time(); if (alloc >= get_options()->MaxMemInQueues) { + /* Note this overload down */ + rep_hist_note_overload(OVERLOAD_GENERAL); + /* If we're spending over 20% of the memory limit on hidden service * descriptors, free them until we're down to 10%. Do the same for geoip * client cache. */ diff --git a/src/feature/relay/dns.c b/src/feature/relay/dns.c index 3d9e50524f..03da9cd3ed 100644 --- a/src/feature/relay/dns.c +++ b/src/feature/relay/dns.c @@ -63,6 +63,7 @@ #include "feature/relay/dns.h" #include "feature/relay/router.h" #include "feature/relay/routermode.h" +#include "feature/stats/rephist.h" #include "lib/crypt_ops/crypto_rand.h" #include "lib/evloop/compat_libevent.h" #include "lib/sandbox/sandbox.h" @@ -1547,6 +1548,16 @@ evdns_callback(int result, char type, int count, int ttl, void *addresses, tor_addr_make_unspec(&addr); + /* Note down any DNS errors to the statistics module */ + if (result == DNS_ERR_TIMEOUT) { + /* libevent timed out while resolving a name. However, because libevent + * handles retries and timeouts internally, this means that all attempts of + * libevent timed out. If we wanted to get more granular information about + * individual libevent attempts, we would have to implement our own DNS + * timeout/retry logic */ + rep_hist_note_overload(OVERLOAD_GENERAL); + } + /* Keep track of whether IPv6 is working */ if (type == DNS_IPv6_AAAA) { if (result == DNS_ERR_TIMEOUT) { diff --git a/src/feature/relay/onion_queue.c b/src/feature/relay/onion_queue.c index 3cbaa65d28..eaf7608fac 100644 --- a/src/feature/relay/onion_queue.c +++ b/src/feature/relay/onion_queue.c @@ -33,6 +33,7 @@ #include "core/or/circuitlist.h" #include "core/or/onion.h" #include "feature/nodelist/networkstatus.h" +#include "feature/stats/rephist.h" #include "core/or/or_circuit_st.h" @@ -163,15 +164,19 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin) #define WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL (60) static ratelim_t last_warned = RATELIM_INIT(WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL); - char *m; - if (onionskin->handshake_type == ONION_HANDSHAKE_TYPE_NTOR && - (m = rate_limit_log(&last_warned, approx_time()))) { - log_warn(LD_GENERAL, - "Your computer is too slow to handle this many circuit " - "creation requests! Please consider using the " - "MaxAdvertisedBandwidth config option or choosing a more " - "restricted exit policy.%s",m); - tor_free(m); + if (onionskin->handshake_type == ONION_HANDSHAKE_TYPE_NTOR) { + char *m; + /* Note this ntor onionskin drop as an overload */ + rep_hist_note_overload(OVERLOAD_GENERAL); + if ((m = rate_limit_log(&last_warned, approx_time()))) { + log_warn(LD_GENERAL, + "Your computer is too slow to handle this many circuit " + "creation requests! Please consider using the " + "MaxAdvertisedBandwidth config option or choosing a more " + "restricted exit policy.%s", + m); + tor_free(m); + } } tor_free(tmp); return -1; From faf1fe833eb4a59aae4843b450b85b4d8894ba02 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 9 Mar 2021 15:40:00 +0200 Subject: [PATCH 3/5] Implement port/file-descriptor exhaustion metric. This uses a heuristic that came up after a discussion with David. I'm leaving this as a separate commit so that it can be discussed further if needed. --- src/core/mainloop/connection.c | 38 +++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/core/mainloop/connection.c b/src/core/mainloop/connection.c index 9bf9f32eaa..9e3a343530 100644 --- a/src/core/mainloop/connection.c +++ b/src/core/mainloop/connection.c @@ -1239,12 +1239,34 @@ create_unix_sockaddr(const char *listenaddress, char **readable_address, } #endif /* defined(HAVE_SYS_UN_H) || defined(RUNNING_DOXYGEN) */ -/** Warn that an accept or a connect has failed because we're running out of - * TCP sockets we can use on current system. Rate-limit these warnings so - * that we don't spam the log. */ +/** + * A socket failed from resource exhaustion. + * + * AMong other actions, warn that an accept or a connect has failed because + * we're running out of TCP sockets we can use on current system. Rate-limit + * these warnings so that we don't spam the log. */ static void -warn_too_many_conns(void) +socket_failed_from_resource_exhaustion(void) { + /* When we get to this point we know that a socket could not be + * established. However the kernel does not let us know whether the reason is + * because we ran out of TCP source ports, or because we exhausted all the + * FDs on this system, or for any other reason. + * + * For this reason, we are going to use the following heuristic: If our + * system supports a lot of sockets, we will assume that it's a problem of + * TCP port exhaustion. Otherwise, if our system does not support many + * sockets, we will assume that this is because of file descriptor + * exhaustion. + */ + if (get_max_sockets() > 65535) { + /* TCP port exhaustion */ + rep_hist_note_overload(OVERLOAD_GENERAL); + } else { + /* File descriptor exhaustion */ + rep_hist_note_overload(OVERLOAD_FD_EXHAUSTED); + } + #define WARN_TOO_MANY_CONNS_INTERVAL (6*60*60) static ratelim_t last_warned = RATELIM_INIT(WARN_TOO_MANY_CONNS_INTERVAL); char *m; @@ -1473,7 +1495,7 @@ connection_listener_new(const struct sockaddr *listensockaddr, if (!SOCKET_OK(s)) { int e = tor_socket_errno(s); if (ERRNO_IS_RESOURCE_LIMIT(e)) { - warn_too_many_conns(); + socket_failed_from_resource_exhaustion(); /* * We'll call the OOS handler at the error exit, so set the * exhaustion flag for it. @@ -1599,7 +1621,7 @@ connection_listener_new(const struct sockaddr *listensockaddr, if (! SOCKET_OK(s)) { int e = tor_socket_errno(s); if (ERRNO_IS_RESOURCE_LIMIT(e)) { - warn_too_many_conns(); + socket_failed_from_resource_exhaustion(); /* * We'll call the OOS handler at the error exit, so set the * exhaustion flag for it. @@ -1912,7 +1934,7 @@ connection_handle_listener_read(connection_t *conn, int new_type) connection_check_oos(get_n_open_sockets(), 0); return 0; } else if (ERRNO_IS_RESOURCE_LIMIT(e)) { - warn_too_many_conns(); + socket_failed_from_resource_exhaustion(); /* Exhaustion; tell the OOS handler */ connection_check_oos(get_n_open_sockets(), 1); return 0; @@ -2175,7 +2197,7 @@ connection_connect_sockaddr,(connection_t *conn, */ *socket_error = tor_socket_errno(s); if (ERRNO_IS_RESOURCE_LIMIT(*socket_error)) { - warn_too_many_conns(); + socket_failed_from_resource_exhaustion(); connection_check_oos(get_n_open_sockets(), 1); } else { log_warn(LD_NET,"Error creating network socket: %s", From e688580277cd0223a32bf08603013b59693c713c Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 9 Mar 2021 15:41:11 +0200 Subject: [PATCH 4/5] Implement rate limiting metrics. --- src/core/mainloop/connection.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/core/mainloop/connection.c b/src/core/mainloop/connection.c index 9e3a343530..5ed0ba9aa6 100644 --- a/src/core/mainloop/connection.c +++ b/src/core/mainloop/connection.c @@ -3439,6 +3439,16 @@ connection_bucket_read_limit(connection_t *conn, time_t now) int priority = conn->type != CONN_TYPE_DIR; ssize_t conn_bucket = -1; size_t global_bucket_val = token_bucket_rw_get_read(&global_bucket); + if (global_bucket_val == 0) { + /* We reached our global read limit: count this as an overload. + * + * The token bucket is always initialized (see connection_bucket_init() and + * options_validate_relay_bandwidth()) and hence we can assume that if the + * token ever hits zero, it's a limit that got popped and not the bucket + * being uninitialized. + */ + rep_hist_note_overload(OVERLOAD_READ); + } if (connection_speaks_cells(conn)) { or_connection_t *or_conn = TO_OR_CONN(conn); @@ -3469,6 +3479,11 @@ connection_bucket_write_limit(connection_t *conn, time_t now) int priority = conn->type != CONN_TYPE_DIR; size_t conn_bucket = buf_datalen(conn->outbuf); size_t global_bucket_val = token_bucket_rw_get_write(&global_bucket); + if (global_bucket_val == 0) { + /* We reached our global write limit: We should count this as an overload. + * See above function for more information */ + rep_hist_note_overload(OVERLOAD_WRITE); + } if (!connection_is_rate_limited(conn)) { /* be willing to write to local conns even if our buckets are empty */ From 7740a8b5d4de649e3ba2a0578f789140725974b6 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Wed, 10 Mar 2021 14:37:36 +0200 Subject: [PATCH 5/5] Rate-limit counter should increase once per minute. --- src/feature/stats/rephist.c | 19 ++++++++++++++---- src/test/test_stats.c | 39 +++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c index 64bec000df..2c3f362e45 100644 --- a/src/feature/stats/rephist.c +++ b/src/feature/stats/rephist.c @@ -280,18 +280,29 @@ STMT_END void rep_hist_note_overload(overload_type_t overload) { + static time_t last_read_counted = 0; + static time_t last_write_counted = 0; + switch (overload) { case OVERLOAD_GENERAL: SET_TO_START_OF_HOUR(overload_stats.overload_general_time); break; - case OVERLOAD_READ: + case OVERLOAD_READ: { SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time); - overload_stats.overload_read_count++; + if (approx_time() >= last_read_counted + 60) { /* Count once a minute */ + overload_stats.overload_read_count++; + last_read_counted = approx_time(); + } break; - case OVERLOAD_WRITE: + } + case OVERLOAD_WRITE: { SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time); - overload_stats.overload_write_count++; + if (approx_time() >= last_write_counted + 60) { /* Count once a minute */ + overload_stats.overload_write_count++; + last_write_counted = approx_time(); + } break; + } case OVERLOAD_FD_EXHAUSTED: SET_TO_START_OF_HOUR(overload_stats.overload_fd_exhausted_time); overload_stats.overload_fd_exhausted++; diff --git a/src/test/test_stats.c b/src/test/test_stats.c index 1f14255e31..a22a42f723 100644 --- a/src/test/test_stats.c +++ b/src/test/test_stats.c @@ -807,6 +807,45 @@ test_overload_stats(void *arg) stats_str = rep_hist_get_overload_stats_lines(); tt_assert(!stats_str); + /* Now test the rate-limit rate-limiter ;) */ + for (int i = 0; i < 10; i++) { + rep_hist_note_overload(OVERLOAD_READ); + } + /* We already have an event registered from the previous tests. We just + * registered ten more overload events, but only one should have been counted + * because of the rate limiter */ + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-ratelimits 1 2002-01-10 02:00:00 1000 2000 2 0", + OP_EQ, stats_str); + tor_free(stats_str); + + /* Increment time by 59 secs and try again. No additional events should + register */ + current_time += 59; + update_approx_time(current_time); + + for (int i = 0; i < 10; i++) { + rep_hist_note_overload(OVERLOAD_READ); + } + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-ratelimits 1 2002-01-10 02:00:00 1000 2000 2 0", + OP_EQ, stats_str); + tor_free(stats_str); + + /* Now increment time by 2 secs -- taking it after the minute rate limiting + and see that events will register again */ + current_time += 2; + update_approx_time(current_time); + + for (int i = 0; i < 10; i++) { + rep_hist_note_overload(OVERLOAD_READ); + rep_hist_note_overload(OVERLOAD_WRITE); + } + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-ratelimits 1 2002-01-10 02:00:00 1000 2000 3 1", + OP_EQ, stats_str); + tor_free(stats_str); + done: tor_free(stats_str); }