diff --git a/changes/ticket40708 b/changes/ticket40708 new file mode 100644 index 0000000000..1c4a044a0b --- /dev/null +++ b/changes/ticket40708 @@ -0,0 +1,3 @@ + o Minor feature (metrics): + - Add various congestion control counters to the MetricsPort. Closes ticket + 40708. diff --git a/src/core/or/circuitlist.c b/src/core/or/circuitlist.c index 4dbf4d4549..d1c734bdd2 100644 --- a/src/core/or/circuitlist.c +++ b/src/core/or/circuitlist.c @@ -102,6 +102,8 @@ #include "lib/compress/compress_zstd.h" #include "lib/buf/buffers.h" #include "core/or/congestion_control_common.h" +#include "core/or/congestion_control_st.h" +#include "lib/math/stats.h" #include "core/or/ocirc_event.h" @@ -147,6 +149,15 @@ static void circuit_about_to_free(circuit_t *circ); */ static int any_opened_circs_cached_val = 0; +/** Moving average of the cc->cwnd from each closed circuit. */ +double cc_stats_circ_close_cwnd_ma = 0; +/** Moving average of the cc->cwnd from each closed slow-start circuit. */ +double cc_stats_circ_close_ss_cwnd_ma = 0; + +/* Running count of the above moving averages. Needed so we can update it. */ +static double stats_circ_close_cwnd_ma_count = 0; +static double stats_circ_close_ss_cwnd_ma_count = 0; + /********* END VARIABLES ************/ /* Implement circuit handle helpers. */ @@ -2225,6 +2236,29 @@ circuit_mark_for_close_, (circuit_t *circ, int reason, int line, /* Notify the HS subsystem that this circuit is closing. */ hs_circ_cleanup_on_close(circ); + /* Update stats. */ + if (circ->ccontrol) { + if (circ->ccontrol->in_slow_start) { + /* If we are in slow start, only count the ss cwnd if we've sent + * enough data to get RTT measurements such that we have a min + * and a max RTT, and they are not the same. This prevents us from + * averaging and reporting unused and low-use circuits here */ + if (circ->ccontrol->max_rtt_usec != circ->ccontrol->min_rtt_usec) { + stats_circ_close_ss_cwnd_ma_count++; + cc_stats_circ_close_ss_cwnd_ma = + stats_update_running_avg(cc_stats_circ_close_ss_cwnd_ma, + circ->ccontrol->cwnd, + stats_circ_close_ss_cwnd_ma_count); + } + } else { + stats_circ_close_cwnd_ma_count++; + cc_stats_circ_close_cwnd_ma = + stats_update_running_avg(cc_stats_circ_close_cwnd_ma, + circ->ccontrol->cwnd, + stats_circ_close_cwnd_ma_count); + } + } + if (circuits_pending_close == NULL) circuits_pending_close = smartlist_new(); diff --git a/src/core/or/circuitlist.h b/src/core/or/circuitlist.h index 147e2cb2f8..281ea1f76f 100644 --- a/src/core/or/circuitlist.h +++ b/src/core/or/circuitlist.h @@ -161,6 +161,10 @@ ((p) == CIRCUIT_PURPOSE_C_GENERAL || \ (p) == CIRCUIT_PURPOSE_C_HSDIR_GET) +/** Stats. */ +extern double cc_stats_circ_close_cwnd_ma; +extern double cc_stats_circ_close_ss_cwnd_ma; + /** Convert a circuit_t* to a pointer to the enclosing or_circuit_t. Assert * if the cast is impossible. */ or_circuit_t *TO_OR_CIRCUIT(circuit_t *); diff --git a/src/core/or/congestion_control_common.c b/src/core/or/congestion_control_common.c index 09d1d501da..c27eb2fca8 100644 --- a/src/core/or/congestion_control_common.c +++ b/src/core/or/congestion_control_common.c @@ -94,6 +94,9 @@ void congestion_control_set_cc_enabled(void); /* Number of times the RTT value was reset. For MetricsPort. */ static uint64_t num_rtt_reset; +/* Number of times the clock was stalled. For MetricsPort. */ +static uint64_t num_clock_stalls; + /* Consensus parameters cached. The non static ones are extern. */ static uint32_t cwnd_max = CWND_MAX_DFLT; int32_t cell_queue_high = CELL_QUEUE_HIGH_DFLT; @@ -136,6 +139,13 @@ congestion_control_get_num_rtt_reset(void) return num_rtt_reset; } +/** Return the number of clock stalls that have been done. */ +uint64_t +congestion_control_get_num_clock_stalls(void) +{ + return num_clock_stalls; +} + /** * Update global congestion control related consensus parameter values, * every consensus update. @@ -872,6 +882,7 @@ congestion_control_update_circuit_rtt(congestion_control_t *cc, /* Do not update RTT at all if it looks fishy */ if (time_delta_stalled_or_jumped(cc, cc->ewma_rtt_usec, rtt)) { + num_clock_stalls++; /* Accounting */ return 0; } diff --git a/src/core/or/congestion_control_common.h b/src/core/or/congestion_control_common.h index e62775ecce..a2740fb0b6 100644 --- a/src/core/or/congestion_control_common.h +++ b/src/core/or/congestion_control_common.h @@ -83,6 +83,7 @@ bool congestion_control_validate_sendme_increment(uint8_t sendme_inc); char *congestion_control_get_control_port_fields(const origin_circuit_t *); uint64_t congestion_control_get_num_rtt_reset(void); +uint64_t congestion_control_get_num_clock_stalls(void); /* Ugh, C.. these are private. Use the getter instead, when * external to the congestion control code. */ diff --git a/src/core/or/congestion_control_flow.c b/src/core/or/congestion_control_flow.c index ea2c0df42c..729e67b9a3 100644 --- a/src/core/or/congestion_control_flow.c +++ b/src/core/or/congestion_control_flow.c @@ -23,6 +23,7 @@ #include "feature/nodelist/networkstatus.h" #include "trunnel/flow_control_cells.h" #include "feature/control/control_events.h" +#include "lib/math/stats.h" #include "core/or/connection_st.h" #include "core/or/cell_st.h" @@ -36,6 +37,14 @@ static uint32_t xon_change_pct; static uint32_t xon_ewma_cnt; static uint32_t xon_rate_bytes; +/** Metricsport stats */ +uint64_t cc_stats_flow_num_xoff_sent; +uint64_t cc_stats_flow_num_xon_sent; +double cc_stats_flow_xoff_outbuf_ma = 0; +static double cc_stats_flow_xoff_outbuf_ma_count = 0; +double cc_stats_flow_xon_outbuf_ma = 0; +static double cc_stats_flow_xon_outbuf_ma_count = 0; + /* In normal operation, we can get a burst of up to 32 cells before returning * to libevent to flush the outbuf. This is a heuristic from hardcoded values * and strange logic in connection_bucket_get_share(). */ @@ -148,6 +157,7 @@ circuit_send_stream_xoff(edge_connection_t *stream) if (connection_edge_send_command(stream, RELAY_COMMAND_XOFF, (char*)payload, (size_t)xoff_size) == 0) { stream->xoff_sent = true; + cc_stats_flow_num_xoff_sent++; /* If this is an entry conn, notify control port */ if (TO_CONN(stream)->type == CONN_TYPE_AP) { @@ -222,6 +232,8 @@ circuit_send_stream_xon(edge_connection_t *stream) /* Revert the xoff sent status, so we can send another one if need be */ stream->xoff_sent = false; + cc_stats_flow_num_xon_sent++; + /* If it's an entry conn, notify control port */ if (TO_CONN(stream)->type == CONN_TYPE_AP) { control_event_stream_status(TO_ENTRY_CONN(TO_CONN(stream)), @@ -473,6 +485,12 @@ flow_control_decide_xoff(edge_connection_t *stream) total_buffered, buffer_limit_xoff); tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xoff_sending), stream); + cc_stats_flow_xoff_outbuf_ma_count++; + cc_stats_flow_xoff_outbuf_ma = + stats_update_running_avg(cc_stats_flow_xoff_outbuf_ma, + total_buffered, + cc_stats_flow_xoff_outbuf_ma_count); + circuit_send_stream_xoff(stream); /* Clear the drain rate. It is considered wrong if we @@ -627,6 +645,13 @@ flow_control_decide_xon(edge_connection_t *stream, size_t n_written) stream->ewma_drain_rate, total_buffered); tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xon_rate_change), stream); + + cc_stats_flow_xon_outbuf_ma_count++; + cc_stats_flow_xon_outbuf_ma = + stats_update_running_avg(cc_stats_flow_xon_outbuf_ma, + total_buffered, + cc_stats_flow_xon_outbuf_ma_count); + circuit_send_stream_xon(stream); } } else if (total_buffered == 0) { diff --git a/src/core/or/congestion_control_flow.h b/src/core/or/congestion_control_flow.h index 6c318027ea..5c735cce23 100644 --- a/src/core/or/congestion_control_flow.h +++ b/src/core/or/congestion_control_flow.h @@ -33,6 +33,12 @@ bool conn_uses_flow_control(connection_t *stream); uint64_t edge_get_max_rtt(const edge_connection_t *); +/** Metricsport externs */ +extern uint64_t cc_stats_flow_num_xoff_sent; +extern uint64_t cc_stats_flow_num_xon_sent; +extern double cc_stats_flow_xoff_outbuf_ma; +extern double cc_stats_flow_xon_outbuf_ma; + /* Private section starts. */ #ifdef TOR_CONGESTION_CONTROL_FLOW_PRIVATE diff --git a/src/core/or/congestion_control_vegas.c b/src/core/or/congestion_control_vegas.c index f129ecadd6..87f59d12fd 100644 --- a/src/core/or/congestion_control_vegas.c +++ b/src/core/or/congestion_control_vegas.c @@ -23,6 +23,7 @@ #include "core/or/channel.h" #include "feature/nodelist/networkstatus.h" #include "feature/control/control_events.h" +#include "lib/math/stats.h" #define OUTBUF_CELLS (2*TLS_RECORD_MAX_CELLS) @@ -49,6 +50,16 @@ #define VEGAS_DELTA_ONION_DFLT (9*OUTBUF_CELLS) #define VEGAS_SSCAP_ONION_DFLT (600) +/** Moving average of the cc->cwnd from each circuit exiting slowstart. */ +double cc_stats_vegas_exit_ss_cwnd_ma = 0; +/* Running count of this moving average. Needed so we can update it. */ +static double stats_cwnd_exit_ss_ma_count = 0; + +/** Stats on how many times we reached "delta" param. */ +uint64_t cc_stats_vegas_above_delta = 0; +/** Stats on how many times we reached "ss_cwnd_max" param. */ +uint64_t cc_stats_vegas_above_ss_cwnd_max = 0; + /** * The original TCP Vegas congestion window BDP estimator. */ @@ -243,6 +254,12 @@ congestion_control_vegas_exit_slow_start(const circuit_t *circ, cc->next_cc_event = CWND_UPDATE_RATE(cc); congestion_control_vegas_log(circ, cc); + /* Update running cc->cwnd average for metrics. */ + stats_cwnd_exit_ss_ma_count++; + cc_stats_vegas_exit_ss_cwnd_ma = + stats_update_running_avg(cc_stats_vegas_exit_ss_cwnd_ma, + cc->cwnd, stats_cwnd_exit_ss_ma_count); + /* We need to report that slow start has exited ASAP, * for sbws bandwidth measurement. */ if (CIRCUIT_IS_ORIGIN(circ)) { @@ -322,11 +339,13 @@ congestion_control_vegas_process_sendme(congestion_control_t *cc, if (cc->cwnd >= cc->vegas_params.ss_cwnd_max) { cc->cwnd = cc->vegas_params.ss_cwnd_max; congestion_control_vegas_exit_slow_start(circ, cc); + cc_stats_vegas_above_ss_cwnd_max++; } /* After slow start, We only update once per window */ } else if (cc->next_cc_event == 0) { if (queue_use > cc->vegas_params.delta) { cc->cwnd = vegas_bdp(cc) + cc->vegas_params.delta - CWND_INC(cc); + cc_stats_vegas_above_delta++; } else if (queue_use > cc->vegas_params.beta || cc->blocked_chan) { cc->cwnd -= CWND_INC(cc); } else if (queue_use < cc->vegas_params.alpha) { diff --git a/src/core/or/congestion_control_vegas.h b/src/core/or/congestion_control_vegas.h index 95fcea5722..5aced07a8f 100644 --- a/src/core/or/congestion_control_vegas.h +++ b/src/core/or/congestion_control_vegas.h @@ -12,6 +12,10 @@ #include "core/or/crypt_path_st.h" #include "core/or/circuit_st.h" +extern double cc_stats_vegas_exit_ss_cwnd_ma; +extern uint64_t cc_stats_vegas_above_delta; +extern uint64_t cc_stats_vegas_above_ss_cwnd_max; + /* Processing SENDME cell. */ int congestion_control_vegas_process_sendme(struct congestion_control_t *cc, const circuit_t *circ, diff --git a/src/core/or/relay.c b/src/core/or/relay.c index 5489332309..69b85b5d80 100644 --- a/src/core/or/relay.c +++ b/src/core/or/relay.c @@ -128,6 +128,7 @@ uint64_t stats_n_relay_cells_delivered = 0; /** Stats: how many circuits have we closed due to the cell queue limit being * reached (see append_cell_to_circuit_queue()) */ uint64_t stats_n_circ_max_cell_reached = 0; +uint64_t stats_n_circ_max_cell_outq_reached = 0; /** * Update channel usage state based on the type of relay cell and @@ -3265,6 +3266,7 @@ append_cell_to_circuit_queue(circuit_t *circ, channel_t *chan, /* This DoS defense only applies at the Guard as in the p_chan is likely * a client IP attacking the network. */ if (exitward && CIRCUIT_IS_ORCIRC(circ)) { + stats_n_circ_max_cell_outq_reached++; dos_note_circ_max_outq(CONST_TO_OR_CIRCUIT(circ)->p_chan); } diff --git a/src/core/or/relay.h b/src/core/or/relay.h index 24466bccd0..3a1f3b0ae5 100644 --- a/src/core/or/relay.h +++ b/src/core/or/relay.h @@ -15,6 +15,7 @@ extern uint64_t stats_n_relay_cells_relayed; extern uint64_t stats_n_relay_cells_delivered; extern uint64_t stats_n_circ_max_cell_reached; +extern uint64_t stats_n_circ_max_cell_outq_reached; const char *relay_command_to_string(uint8_t command); diff --git a/src/feature/relay/relay_metrics.c b/src/feature/relay/relay_metrics.c index cc36736831..076ac68618 100644 --- a/src/feature/relay/relay_metrics.c +++ b/src/feature/relay/relay_metrics.c @@ -14,16 +14,19 @@ #include "core/mainloop/connection.h" #include "core/mainloop/mainloop.h" #include "core/or/congestion_control_common.h" +#include "core/or/congestion_control_vegas.h" +#include "core/or/congestion_control_flow.h" #include "core/or/circuitlist.h" #include "core/or/dos.h" #include "core/or/relay.h" #include "app/config/config.h" -#include "lib/malloc/malloc.h" #include "lib/container/smartlist.h" -#include "lib/metrics/metrics_store.h" #include "lib/log/util_bug.h" +#include "lib/malloc/malloc.h" +#include "lib/math/fp.h" +#include "lib/metrics/metrics_store.h" #include "feature/hs/hs_dos.h" #include "feature/nodelist/nodelist.h" @@ -314,6 +317,12 @@ fill_dos_values(void) metrics_format_label("type", "circuit_killed_max_cell")); metrics_store_entry_update(sentry, stats_n_circ_max_cell_reached); + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("type", "circuit_killed_max_cell_outq")); + metrics_store_entry_update(sentry, stats_n_circ_max_cell_outq_reached); + sentry = metrics_store_add(the_store, rentry->type, rentry->name, rentry->help); metrics_store_entry_add_label(sentry, @@ -364,6 +373,94 @@ fill_cc_values(void) metrics_store_entry_add_label(sentry, metrics_format_label("action", "rtt_reset")); metrics_store_entry_update(sentry, congestion_control_get_num_rtt_reset()); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "clock_stalls")); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "rtt_skipped")); + metrics_store_entry_update(sentry, + congestion_control_get_num_clock_stalls()); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "slow_start_exit")); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "cwnd")); + metrics_store_entry_update(sentry, + tor_llround(cc_stats_vegas_exit_ss_cwnd_ma)); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "on_circ_close")); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "cwnd")); + metrics_store_entry_update(sentry, + tor_llround(cc_stats_circ_close_cwnd_ma)); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "on_circ_close")); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "ss_cwnd")); + metrics_store_entry_update(sentry, + tor_llround(cc_stats_circ_close_ss_cwnd_ma)); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "xoff")); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "outbuf")); + metrics_store_entry_update(sentry, + tor_llround(cc_stats_flow_xoff_outbuf_ma)); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "xoff")); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "num_sent")); + metrics_store_entry_update(sentry, + cc_stats_flow_num_xoff_sent); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "xon")); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "outbuf")); + metrics_store_entry_update(sentry, + tor_llround(cc_stats_flow_xon_outbuf_ma)); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "xon")); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "num_sent")); + metrics_store_entry_update(sentry, + cc_stats_flow_num_xon_sent); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "process_sendme")); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "above_delta")); + metrics_store_entry_update(sentry, cc_stats_vegas_above_delta); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "process_sendme")); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "above_ss_cwnd_max")); + metrics_store_entry_update(sentry, cc_stats_vegas_above_ss_cwnd_max); } /** Helper: Fill in single stream metrics output. */ diff --git a/src/lib/math/include.am b/src/lib/math/include.am index b2ca280f47..f68b265da7 100644 --- a/src/lib/math/include.am +++ b/src/lib/math/include.am @@ -20,4 +20,5 @@ src_lib_libtor_math_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS) noinst_HEADERS += \ src/lib/math/fp.h \ src/lib/math/laplace.h \ - src/lib/math/prob_distr.h + src/lib/math/prob_distr.h \ + src/lib/math/stats.h diff --git a/src/lib/math/stats.h b/src/lib/math/stats.h new file mode 100644 index 0000000000..328d61a9d6 --- /dev/null +++ b/src/lib/math/stats.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2022, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file stats.h + * + * \brief Header for stats.c + **/ + +#ifndef TOR_STATS_H +#define TOR_STATS_H + +/** Update an average making it a "running average". The "avg" is the current + * value that will be updated to the new one. The "value" is the new value to + * add to the average and "n" is the new count as in including the "value". */ +static inline double +stats_update_running_avg(double avg, double value, double n) +{ + return ((avg * (n - 1)) + value) / n; +} + +#endif /* !defined(TOR_STATS_H) */