Merge branch 'tor-gitlab/mr/650' into maint-0.4.7

This commit is contained in:
David Goulet 2022-11-08 12:36:19 -05:00
commit fde87096c3
14 changed files with 233 additions and 3 deletions

3
changes/ticket40708 Normal file
View File

@ -0,0 +1,3 @@
o Minor feature (metrics):
- Add various congestion control counters to the MetricsPort. Closes ticket
40708.

View File

@ -102,6 +102,8 @@
#include "lib/compress/compress_zstd.h"
#include "lib/buf/buffers.h"
#include "core/or/congestion_control_common.h"
#include "core/or/congestion_control_st.h"
#include "lib/math/stats.h"
#include "core/or/ocirc_event.h"
@ -147,6 +149,15 @@ static void circuit_about_to_free(circuit_t *circ);
*/
static int any_opened_circs_cached_val = 0;
/** Moving average of the cc->cwnd from each closed circuit. */
double cc_stats_circ_close_cwnd_ma = 0;
/** Moving average of the cc->cwnd from each closed slow-start circuit. */
double cc_stats_circ_close_ss_cwnd_ma = 0;
/* Running count of the above moving averages. Needed so we can update it. */
static double stats_circ_close_cwnd_ma_count = 0;
static double stats_circ_close_ss_cwnd_ma_count = 0;
/********* END VARIABLES ************/
/* Implement circuit handle helpers. */
@ -2225,6 +2236,29 @@ circuit_mark_for_close_, (circuit_t *circ, int reason, int line,
/* Notify the HS subsystem that this circuit is closing. */
hs_circ_cleanup_on_close(circ);
/* Update stats. */
if (circ->ccontrol) {
if (circ->ccontrol->in_slow_start) {
/* If we are in slow start, only count the ss cwnd if we've sent
* enough data to get RTT measurements such that we have a min
* and a max RTT, and they are not the same. This prevents us from
* averaging and reporting unused and low-use circuits here */
if (circ->ccontrol->max_rtt_usec != circ->ccontrol->min_rtt_usec) {
stats_circ_close_ss_cwnd_ma_count++;
cc_stats_circ_close_ss_cwnd_ma =
stats_update_running_avg(cc_stats_circ_close_ss_cwnd_ma,
circ->ccontrol->cwnd,
stats_circ_close_ss_cwnd_ma_count);
}
} else {
stats_circ_close_cwnd_ma_count++;
cc_stats_circ_close_cwnd_ma =
stats_update_running_avg(cc_stats_circ_close_cwnd_ma,
circ->ccontrol->cwnd,
stats_circ_close_cwnd_ma_count);
}
}
if (circuits_pending_close == NULL)
circuits_pending_close = smartlist_new();

View File

@ -161,6 +161,10 @@
((p) == CIRCUIT_PURPOSE_C_GENERAL || \
(p) == CIRCUIT_PURPOSE_C_HSDIR_GET)
/** Stats. */
extern double cc_stats_circ_close_cwnd_ma;
extern double cc_stats_circ_close_ss_cwnd_ma;
/** Convert a circuit_t* to a pointer to the enclosing or_circuit_t. Assert
* if the cast is impossible. */
or_circuit_t *TO_OR_CIRCUIT(circuit_t *);

View File

@ -94,6 +94,9 @@ void congestion_control_set_cc_enabled(void);
/* Number of times the RTT value was reset. For MetricsPort. */
static uint64_t num_rtt_reset;
/* Number of times the clock was stalled. For MetricsPort. */
static uint64_t num_clock_stalls;
/* Consensus parameters cached. The non static ones are extern. */
static uint32_t cwnd_max = CWND_MAX_DFLT;
int32_t cell_queue_high = CELL_QUEUE_HIGH_DFLT;
@ -136,6 +139,13 @@ congestion_control_get_num_rtt_reset(void)
return num_rtt_reset;
}
/** Return the number of clock stalls that have been done. */
uint64_t
congestion_control_get_num_clock_stalls(void)
{
return num_clock_stalls;
}
/**
* Update global congestion control related consensus parameter values,
* every consensus update.
@ -872,6 +882,7 @@ congestion_control_update_circuit_rtt(congestion_control_t *cc,
/* Do not update RTT at all if it looks fishy */
if (time_delta_stalled_or_jumped(cc, cc->ewma_rtt_usec, rtt)) {
num_clock_stalls++; /* Accounting */
return 0;
}

View File

@ -83,6 +83,7 @@ bool congestion_control_validate_sendme_increment(uint8_t sendme_inc);
char *congestion_control_get_control_port_fields(const origin_circuit_t *);
uint64_t congestion_control_get_num_rtt_reset(void);
uint64_t congestion_control_get_num_clock_stalls(void);
/* Ugh, C.. these are private. Use the getter instead, when
* external to the congestion control code. */

View File

@ -23,6 +23,7 @@
#include "feature/nodelist/networkstatus.h"
#include "trunnel/flow_control_cells.h"
#include "feature/control/control_events.h"
#include "lib/math/stats.h"
#include "core/or/connection_st.h"
#include "core/or/cell_st.h"
@ -36,6 +37,14 @@ static uint32_t xon_change_pct;
static uint32_t xon_ewma_cnt;
static uint32_t xon_rate_bytes;
/** Metricsport stats */
uint64_t cc_stats_flow_num_xoff_sent;
uint64_t cc_stats_flow_num_xon_sent;
double cc_stats_flow_xoff_outbuf_ma = 0;
static double cc_stats_flow_xoff_outbuf_ma_count = 0;
double cc_stats_flow_xon_outbuf_ma = 0;
static double cc_stats_flow_xon_outbuf_ma_count = 0;
/* In normal operation, we can get a burst of up to 32 cells before returning
* to libevent to flush the outbuf. This is a heuristic from hardcoded values
* and strange logic in connection_bucket_get_share(). */
@ -148,6 +157,7 @@ circuit_send_stream_xoff(edge_connection_t *stream)
if (connection_edge_send_command(stream, RELAY_COMMAND_XOFF,
(char*)payload, (size_t)xoff_size) == 0) {
stream->xoff_sent = true;
cc_stats_flow_num_xoff_sent++;
/* If this is an entry conn, notify control port */
if (TO_CONN(stream)->type == CONN_TYPE_AP) {
@ -222,6 +232,8 @@ circuit_send_stream_xon(edge_connection_t *stream)
/* Revert the xoff sent status, so we can send another one if need be */
stream->xoff_sent = false;
cc_stats_flow_num_xon_sent++;
/* If it's an entry conn, notify control port */
if (TO_CONN(stream)->type == CONN_TYPE_AP) {
control_event_stream_status(TO_ENTRY_CONN(TO_CONN(stream)),
@ -473,6 +485,12 @@ flow_control_decide_xoff(edge_connection_t *stream)
total_buffered, buffer_limit_xoff);
tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xoff_sending), stream);
cc_stats_flow_xoff_outbuf_ma_count++;
cc_stats_flow_xoff_outbuf_ma =
stats_update_running_avg(cc_stats_flow_xoff_outbuf_ma,
total_buffered,
cc_stats_flow_xoff_outbuf_ma_count);
circuit_send_stream_xoff(stream);
/* Clear the drain rate. It is considered wrong if we
@ -627,6 +645,13 @@ flow_control_decide_xon(edge_connection_t *stream, size_t n_written)
stream->ewma_drain_rate,
total_buffered);
tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xon_rate_change), stream);
cc_stats_flow_xon_outbuf_ma_count++;
cc_stats_flow_xon_outbuf_ma =
stats_update_running_avg(cc_stats_flow_xon_outbuf_ma,
total_buffered,
cc_stats_flow_xon_outbuf_ma_count);
circuit_send_stream_xon(stream);
}
} else if (total_buffered == 0) {

View File

@ -33,6 +33,12 @@ bool conn_uses_flow_control(connection_t *stream);
uint64_t edge_get_max_rtt(const edge_connection_t *);
/** Metricsport externs */
extern uint64_t cc_stats_flow_num_xoff_sent;
extern uint64_t cc_stats_flow_num_xon_sent;
extern double cc_stats_flow_xoff_outbuf_ma;
extern double cc_stats_flow_xon_outbuf_ma;
/* Private section starts. */
#ifdef TOR_CONGESTION_CONTROL_FLOW_PRIVATE

View File

@ -23,6 +23,7 @@
#include "core/or/channel.h"
#include "feature/nodelist/networkstatus.h"
#include "feature/control/control_events.h"
#include "lib/math/stats.h"
#define OUTBUF_CELLS (2*TLS_RECORD_MAX_CELLS)
@ -49,6 +50,16 @@
#define VEGAS_DELTA_ONION_DFLT (9*OUTBUF_CELLS)
#define VEGAS_SSCAP_ONION_DFLT (600)
/** Moving average of the cc->cwnd from each circuit exiting slowstart. */
double cc_stats_vegas_exit_ss_cwnd_ma = 0;
/* Running count of this moving average. Needed so we can update it. */
static double stats_cwnd_exit_ss_ma_count = 0;
/** Stats on how many times we reached "delta" param. */
uint64_t cc_stats_vegas_above_delta = 0;
/** Stats on how many times we reached "ss_cwnd_max" param. */
uint64_t cc_stats_vegas_above_ss_cwnd_max = 0;
/**
* The original TCP Vegas congestion window BDP estimator.
*/
@ -243,6 +254,12 @@ congestion_control_vegas_exit_slow_start(const circuit_t *circ,
cc->next_cc_event = CWND_UPDATE_RATE(cc);
congestion_control_vegas_log(circ, cc);
/* Update running cc->cwnd average for metrics. */
stats_cwnd_exit_ss_ma_count++;
cc_stats_vegas_exit_ss_cwnd_ma =
stats_update_running_avg(cc_stats_vegas_exit_ss_cwnd_ma,
cc->cwnd, stats_cwnd_exit_ss_ma_count);
/* We need to report that slow start has exited ASAP,
* for sbws bandwidth measurement. */
if (CIRCUIT_IS_ORIGIN(circ)) {
@ -322,11 +339,13 @@ congestion_control_vegas_process_sendme(congestion_control_t *cc,
if (cc->cwnd >= cc->vegas_params.ss_cwnd_max) {
cc->cwnd = cc->vegas_params.ss_cwnd_max;
congestion_control_vegas_exit_slow_start(circ, cc);
cc_stats_vegas_above_ss_cwnd_max++;
}
/* After slow start, We only update once per window */
} else if (cc->next_cc_event == 0) {
if (queue_use > cc->vegas_params.delta) {
cc->cwnd = vegas_bdp(cc) + cc->vegas_params.delta - CWND_INC(cc);
cc_stats_vegas_above_delta++;
} else if (queue_use > cc->vegas_params.beta || cc->blocked_chan) {
cc->cwnd -= CWND_INC(cc);
} else if (queue_use < cc->vegas_params.alpha) {

View File

@ -12,6 +12,10 @@
#include "core/or/crypt_path_st.h"
#include "core/or/circuit_st.h"
extern double cc_stats_vegas_exit_ss_cwnd_ma;
extern uint64_t cc_stats_vegas_above_delta;
extern uint64_t cc_stats_vegas_above_ss_cwnd_max;
/* Processing SENDME cell. */
int congestion_control_vegas_process_sendme(struct congestion_control_t *cc,
const circuit_t *circ,

View File

@ -128,6 +128,7 @@ uint64_t stats_n_relay_cells_delivered = 0;
/** Stats: how many circuits have we closed due to the cell queue limit being
* reached (see append_cell_to_circuit_queue()) */
uint64_t stats_n_circ_max_cell_reached = 0;
uint64_t stats_n_circ_max_cell_outq_reached = 0;
/**
* Update channel usage state based on the type of relay cell and
@ -3252,6 +3253,7 @@ append_cell_to_circuit_queue(circuit_t *circ, channel_t *chan,
/* This DoS defense only applies at the Guard as in the p_chan is likely
* a client IP attacking the network. */
if (exitward && CIRCUIT_IS_ORCIRC(circ)) {
stats_n_circ_max_cell_outq_reached++;
dos_note_circ_max_outq(CONST_TO_OR_CIRCUIT(circ)->p_chan);
}

View File

@ -15,6 +15,7 @@
extern uint64_t stats_n_relay_cells_relayed;
extern uint64_t stats_n_relay_cells_delivered;
extern uint64_t stats_n_circ_max_cell_reached;
extern uint64_t stats_n_circ_max_cell_outq_reached;
const char *relay_command_to_string(uint8_t command);

View File

@ -14,16 +14,19 @@
#include "core/mainloop/connection.h"
#include "core/mainloop/mainloop.h"
#include "core/or/congestion_control_common.h"
#include "core/or/congestion_control_vegas.h"
#include "core/or/congestion_control_flow.h"
#include "core/or/circuitlist.h"
#include "core/or/dos.h"
#include "core/or/relay.h"
#include "app/config/config.h"
#include "lib/malloc/malloc.h"
#include "lib/container/smartlist.h"
#include "lib/metrics/metrics_store.h"
#include "lib/log/util_bug.h"
#include "lib/malloc/malloc.h"
#include "lib/math/fp.h"
#include "lib/metrics/metrics_store.h"
#include "feature/hs/hs_dos.h"
#include "feature/nodelist/nodelist.h"
@ -314,6 +317,12 @@ fill_dos_values(void)
metrics_format_label("type", "circuit_killed_max_cell"));
metrics_store_entry_update(sentry, stats_n_circ_max_cell_reached);
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("type", "circuit_killed_max_cell_outq"));
metrics_store_entry_update(sentry, stats_n_circ_max_cell_outq_reached);
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
@ -364,6 +373,94 @@ fill_cc_values(void)
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "rtt_reset"));
metrics_store_entry_update(sentry, congestion_control_get_num_rtt_reset());
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("state", "clock_stalls"));
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "rtt_skipped"));
metrics_store_entry_update(sentry,
congestion_control_get_num_clock_stalls());
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("state", "slow_start_exit"));
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "cwnd"));
metrics_store_entry_update(sentry,
tor_llround(cc_stats_vegas_exit_ss_cwnd_ma));
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("state", "on_circ_close"));
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "cwnd"));
metrics_store_entry_update(sentry,
tor_llround(cc_stats_circ_close_cwnd_ma));
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("state", "on_circ_close"));
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "ss_cwnd"));
metrics_store_entry_update(sentry,
tor_llround(cc_stats_circ_close_ss_cwnd_ma));
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("state", "xoff"));
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "outbuf"));
metrics_store_entry_update(sentry,
tor_llround(cc_stats_flow_xoff_outbuf_ma));
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("state", "xoff"));
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "num_sent"));
metrics_store_entry_update(sentry,
cc_stats_flow_num_xoff_sent);
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("state", "xon"));
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "outbuf"));
metrics_store_entry_update(sentry,
tor_llround(cc_stats_flow_xon_outbuf_ma));
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("state", "xon"));
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "num_sent"));
metrics_store_entry_update(sentry,
cc_stats_flow_num_xon_sent);
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("state", "process_sendme"));
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "above_delta"));
metrics_store_entry_update(sentry, cc_stats_vegas_above_delta);
sentry = metrics_store_add(the_store, rentry->type, rentry->name,
rentry->help);
metrics_store_entry_add_label(sentry,
metrics_format_label("state", "process_sendme"));
metrics_store_entry_add_label(sentry,
metrics_format_label("action", "above_ss_cwnd_max"));
metrics_store_entry_update(sentry, cc_stats_vegas_above_ss_cwnd_max);
}
/** Helper: Fill in single stream metrics output. */

View File

@ -20,4 +20,5 @@ src_lib_libtor_math_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS)
noinst_HEADERS += \
src/lib/math/fp.h \
src/lib/math/laplace.h \
src/lib/math/prob_distr.h
src/lib/math/prob_distr.h \
src/lib/math/stats.h

22
src/lib/math/stats.h Normal file
View File

@ -0,0 +1,22 @@
/* Copyright (c) 2022, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
* \file stats.h
*
* \brief Header for stats.c
**/
#ifndef TOR_STATS_H
#define TOR_STATS_H
/** Update an average making it a "running average". The "avg" is the current
* value that will be updated to the new one. The "value" is the new value to
* add to the average and "n" is the new count as in including the "value". */
static inline double
stats_update_running_avg(double avg, double value, double n)
{
return ((avg * (n - 1)) + value) / n;
}
#endif /* !defined(TOR_STATS_H) */