From b0496d40197dd5b4fb7b694c1410082d4e34dda6 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 14 Jun 2022 20:59:03 +0000 Subject: [PATCH 1/2] Fix for RTT calculation hang during congestion control. Only cache RTT on explicit stalls; Only use this cache for the RTT decrease case. Otherwise use only local circuit RTT state for clock jump checks. --- src/core/or/congestion_control_common.c | 51 ++++++++++--------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/src/core/or/congestion_control_common.c b/src/core/or/congestion_control_common.c index 54de48b39d..71cd666ee2 100644 --- a/src/core/or/congestion_control_common.c +++ b/src/core/or/congestion_control_common.c @@ -695,10 +695,9 @@ congestion_control_update_circuit_estimates(congestion_control_t *cc, static bool time_delta_should_use_heuristics(const congestion_control_t *cc) { - - /* If we have exited slow start, we should have processed at least - * a cwnd worth of RTTs */ - if (!cc->in_slow_start) { + /* If we have exited slow start and also have an EWMA RTT, we + * should have processed at least a cwnd worth of RTTs */ + if (!cc->in_slow_start && cc->ewma_rtt_usec) { return true; } @@ -735,34 +734,27 @@ time_delta_stalled_or_jumped(const congestion_control_t *cc, log_fn_ratelim(&stall_info_limit, LOG_INFO, LD_CIRC, "Congestion control cannot measure RTT due to monotime stall."); - /* If delta is every 0, the monotime clock has stalled, and we should - * not use it anywhere. */ is_monotime_clock_broken = true; - - return is_monotime_clock_broken; - } - - /* If the old_delta is 0, we have no previous values on this circuit. - * - * So, return the global monotime status from other circuits, and - * do not update. - */ - if (old_delta == 0) { - return is_monotime_clock_broken; + return true; } /* * For the heuristic cases, we need at least a few timestamps, * to average out any previous partial stalls or jumps. So until - * than point, let's just use the cached status from other circuits. + * that point, let's just assume its OK. */ if (!time_delta_should_use_heuristics(cc)) { - return is_monotime_clock_broken; + return false; } /* If old_delta is significantly larger than new_delta, then - * this means that the monotime clock recently stopped moving - * forward. */ + * this means that the monotime clock could have recently + * stopped moving forward. However, use the cache for this + * value, because it may also be caused by network activity, + * or by a previous clock jump that was not detected. + * + * So if we have not gotten a 0-delta recently, we will + * still allow this new low RTT, but just yell about it. */ if (old_delta > new_delta * DELTA_DISCREPENCY_RATIO_MAX) { static ratelim_t dec_notice_limit = RATELIM_INIT(300); log_fn_ratelim(&dec_notice_limit, LOG_NOTICE, LD_CIRC, @@ -770,29 +762,28 @@ time_delta_stalled_or_jumped(const congestion_control_t *cc, "), likely due to clock jump.", new_delta/1000, old_delta/1000); - is_monotime_clock_broken = true; - return is_monotime_clock_broken; } /* If new_delta is significantly larger than old_delta, then - * this means that the monotime clock suddenly jumped forward. */ + * this means that the monotime clock suddenly jumped forward. + * However, do not cache this value, because it may also be caused + * by network activity. + */ if (new_delta > old_delta * DELTA_DISCREPENCY_RATIO_MAX) { static ratelim_t dec_notice_limit = RATELIM_INIT(300); - log_fn_ratelim(&dec_notice_limit, LOG_NOTICE, LD_CIRC, + log_fn_ratelim(&dec_notice_limit, LOG_PROTOCOL_WARN, LD_CIRC, "Sudden increase in circuit RTT (%"PRIu64" vs %"PRIu64 - "), likely due to clock jump.", + "), likely due to clock jump or suspended remote endpoint.", new_delta/1000, old_delta/1000); - is_monotime_clock_broken = true; - - return is_monotime_clock_broken; + return true; } /* All good! Update cached status, too */ is_monotime_clock_broken = false; - return is_monotime_clock_broken; + return false; } /** From 5a25374209689466e10906a77e66ad717a615a02 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Wed, 15 Jun 2022 21:00:44 +0000 Subject: [PATCH 2/2] Add changes file for bug40626 --- changes/bug40626 | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 changes/bug40626 diff --git a/changes/bug40626 b/changes/bug40626 new file mode 100644 index 0000000000..cda8abe4d7 --- /dev/null +++ b/changes/bug40626 @@ -0,0 +1,6 @@ + o Major bugfixes (congestion control, TROVE-2022-001): + - Fix a scenario where RTT estimation can become wedged, seriously + degrading congestion control performance on all circuits. This impacts + clients, onion services, and relays, and can be triggered remotely by a + malicious endpoint. Tracked as CVE-2022-33903. Fixes bug 40626; bugfix + on 0.4.7.5-alpha.