mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-11 05:33:47 +01:00
Prop#324: Common RTT, BDP, and blocked channel signal support
This commit is contained in:
parent
4f68fe3e6c
commit
f1d0c2d826
933
src/core/or/congestion_control_common.c
Normal file
933
src/core/or/congestion_control_common.c
Normal file
@ -0,0 +1,933 @@
|
||||
/* Copyright (c) 2021, The Tor Project, Inc. */
|
||||
/* See LICENSE for licensing information */
|
||||
|
||||
/**
|
||||
* \file congestion_control_common.c
|
||||
* \brief Common code used by all congestion control algorithms.
|
||||
*/
|
||||
|
||||
#define TOR_CONGESTION_CONTROL_COMMON_PRIVATE
|
||||
|
||||
#include "core/or/or.h"
|
||||
|
||||
#include "core/or/circuitlist.h"
|
||||
#include "core/or/crypt_path.h"
|
||||
#include "core/or/or_circuit_st.h"
|
||||
#include "core/or/origin_circuit_st.h"
|
||||
#include "core/or/channel.h"
|
||||
#include "core/mainloop/connection.h"
|
||||
#include "core/or/sendme.h"
|
||||
#include "core/or/congestion_control_common.h"
|
||||
#include "core/or/congestion_control_vegas.h"
|
||||
#include "core/or/congestion_control_nola.h"
|
||||
#include "core/or/congestion_control_westwood.h"
|
||||
#include "core/or/congestion_control_st.h"
|
||||
#include "lib/time/compat_time.h"
|
||||
#include "feature/nodelist/networkstatus.h"
|
||||
|
||||
/* Consensus parameter defaults */
|
||||
#define CIRCWINDOW_INIT (500)
|
||||
|
||||
#define CWND_INC_PCT_SS_DFLT (100)
|
||||
|
||||
#define SENDME_INC_DFLT (50)
|
||||
#define CWND_MIN_DFLT (MAX(100, SENDME_INC_DFLT))
|
||||
|
||||
#define CWND_INC_DFLT (50)
|
||||
|
||||
#define CWND_INC_RATE_DFLT (1)
|
||||
|
||||
#define WESTWOOD_BDP_ALG BDP_ALG_PIECEWISE
|
||||
#define VEGAS_BDP_MIX_ALG BDP_ALG_PIECEWISE
|
||||
#define NOLA_BDP_ALG BDP_ALG_PIECEWISE
|
||||
|
||||
#define EWMA_CWND_COUNT_DFLT 2
|
||||
|
||||
#define BWE_SENDME_MIN_DFLT 5
|
||||
|
||||
static uint64_t congestion_control_update_circuit_rtt(congestion_control_t *,
|
||||
uint64_t);
|
||||
static bool congestion_control_update_circuit_bdp(congestion_control_t *,
|
||||
const circuit_t *,
|
||||
const crypt_path_t *,
|
||||
uint64_t, uint64_t);
|
||||
|
||||
/**
|
||||
* Set congestion control parameters on a circuit's congestion
|
||||
* control object based on values from the consensus.
|
||||
*
|
||||
* cc_alg is the negotiated congestion control algorithm.
|
||||
*
|
||||
* sendme_inc is the number of packaged cells that a sendme cell
|
||||
* acks. This parameter will come from circuit negotiation.
|
||||
*/
|
||||
static void
|
||||
congestion_control_init_params(congestion_control_t *cc,
|
||||
cc_alg_t cc_alg,
|
||||
int sendme_inc)
|
||||
{
|
||||
#define CWND_INIT_MIN 100
|
||||
#define CWND_INIT_MAX (10000)
|
||||
cc->cwnd =
|
||||
networkstatus_get_param(NULL, "cc_cwnd_init",
|
||||
CIRCWINDOW_INIT,
|
||||
CWND_INIT_MIN,
|
||||
CWND_INIT_MAX);
|
||||
|
||||
#define CWND_INC_PCT_SS_MIN 1
|
||||
#define CWND_INC_PCT_SS_MAX (500)
|
||||
cc->cwnd_inc_pct_ss =
|
||||
networkstatus_get_param(NULL, "cc_cwnd_inc_pct_ss",
|
||||
CWND_INC_PCT_SS_DFLT,
|
||||
CWND_INC_PCT_SS_MIN,
|
||||
CWND_INC_PCT_SS_MAX);
|
||||
|
||||
#define CWND_INC_MIN 1
|
||||
#define CWND_INC_MAX (1000)
|
||||
cc->cwnd_inc =
|
||||
networkstatus_get_param(NULL, "cc_cwnd_inc",
|
||||
CWND_INC_DFLT,
|
||||
CWND_INC_MIN,
|
||||
CWND_INC_MAX);
|
||||
|
||||
#define CWND_INC_RATE_MIN 1
|
||||
#define CWND_INC_RATE_MAX (250)
|
||||
cc->cwnd_inc_rate =
|
||||
networkstatus_get_param(NULL, "cc_cwnd_inc_rate",
|
||||
CWND_INC_RATE_DFLT,
|
||||
CWND_INC_RATE_MIN,
|
||||
CWND_INC_RATE_MAX);
|
||||
|
||||
#define SENDME_INC_MIN 10
|
||||
#define SENDME_INC_MAX (1000)
|
||||
cc->sendme_inc =
|
||||
networkstatus_get_param(NULL, "cc_sendme_inc",
|
||||
sendme_inc,
|
||||
SENDME_INC_MIN,
|
||||
SENDME_INC_MAX);
|
||||
|
||||
// XXX: this min needs to abide by sendme_inc range rules somehow
|
||||
#define CWND_MIN_MIN sendme_inc
|
||||
#define CWND_MIN_MAX (1000)
|
||||
cc->cwnd_min =
|
||||
networkstatus_get_param(NULL, "cc_cwnd_min",
|
||||
CWND_MIN_DFLT,
|
||||
CWND_MIN_MIN,
|
||||
CWND_MIN_MAX);
|
||||
|
||||
#define EWMA_CWND_COUNT_MIN 1
|
||||
#define EWMA_CWND_COUNT_MAX (100)
|
||||
cc->ewma_cwnd_cnt =
|
||||
networkstatus_get_param(NULL, "cc_ewma_cwnd_cnt",
|
||||
EWMA_CWND_COUNT_DFLT,
|
||||
EWMA_CWND_COUNT_MIN,
|
||||
EWMA_CWND_COUNT_MAX);
|
||||
|
||||
#define BWE_SENDME_MIN_MIN 2
|
||||
#define BWE_SENDME_MIN_MAX (20)
|
||||
cc->bwe_sendme_min =
|
||||
networkstatus_get_param(NULL, "cc_bwe_min",
|
||||
BWE_SENDME_MIN_DFLT,
|
||||
BWE_SENDME_MIN_MIN,
|
||||
BWE_SENDME_MIN_MAX);
|
||||
|
||||
#define CC_ALG_MIN 0
|
||||
#define CC_ALG_MAX (NUM_CC_ALGS-1)
|
||||
cc->cc_alg =
|
||||
networkstatus_get_param(NULL, "cc_alg",
|
||||
cc_alg,
|
||||
CC_ALG_MIN,
|
||||
CC_ALG_MAX);
|
||||
|
||||
bdp_alg_t default_bdp_alg = 0;
|
||||
|
||||
switch (cc->cc_alg) {
|
||||
case CC_ALG_WESTWOOD:
|
||||
default_bdp_alg = WESTWOOD_BDP_ALG;
|
||||
break;
|
||||
case CC_ALG_VEGAS:
|
||||
default_bdp_alg = VEGAS_BDP_MIX_ALG;
|
||||
break;
|
||||
case CC_ALG_NOLA:
|
||||
default_bdp_alg = NOLA_BDP_ALG;
|
||||
break;
|
||||
case CC_ALG_SENDME:
|
||||
default:
|
||||
tor_fragile_assert();
|
||||
return; // No alg-specific params
|
||||
}
|
||||
|
||||
cc->bdp_alg =
|
||||
networkstatus_get_param(NULL, "cc_bdp_alg",
|
||||
default_bdp_alg,
|
||||
0,
|
||||
NUM_BDP_ALGS-1);
|
||||
|
||||
/* Algorithm-specific parameters */
|
||||
if (cc->cc_alg == CC_ALG_WESTWOOD) {
|
||||
congestion_control_westwood_set_params(cc);
|
||||
} else if (cc->cc_alg == CC_ALG_VEGAS) {
|
||||
congestion_control_vegas_set_params(cc);
|
||||
} else if (cc->cc_alg == CC_ALG_NOLA) {
|
||||
congestion_control_nola_set_params(cc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate and initialize fields in congestion control object.
|
||||
*
|
||||
* cc_alg is the negotiated congestion control algorithm.
|
||||
*
|
||||
* sendme_inc is the number of packaged cells that a sendme cell
|
||||
* acks. This parameter will come from circuit negotiation.
|
||||
*/
|
||||
static void
|
||||
congestion_control_init(congestion_control_t *cc, cc_alg_t cc_alg,
|
||||
int sendme_inc)
|
||||
{
|
||||
cc->sendme_pending_timestamps = smartlist_new();
|
||||
cc->sendme_arrival_timestamps = smartlist_new();
|
||||
|
||||
cc->in_slow_start = 1;
|
||||
congestion_control_init_params(cc, cc_alg, sendme_inc);
|
||||
|
||||
cc->next_cc_event = CWND_UPDATE_RATE(cc);
|
||||
}
|
||||
|
||||
/** Allocate and initialize a new congestion control object */
|
||||
congestion_control_t *
|
||||
congestion_control_new(void)
|
||||
{
|
||||
congestion_control_t *cc = tor_malloc_zero(sizeof(congestion_control_t));
|
||||
|
||||
// XXX: the alg and the sendme_inc need to be negotiated during
|
||||
// circuit handshake
|
||||
congestion_control_init(cc, CC_ALG_VEGAS, SENDME_INC_DFLT);
|
||||
|
||||
return cc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free a congestion control object and its asssociated state.
|
||||
*/
|
||||
void
|
||||
congestion_control_free_(congestion_control_t *cc)
|
||||
{
|
||||
if (!cc)
|
||||
return;
|
||||
|
||||
SMARTLIST_FOREACH(cc->sendme_pending_timestamps, uint64_t *, t, tor_free(t));
|
||||
SMARTLIST_FOREACH(cc->sendme_arrival_timestamps, uint64_t *, t, tor_free(t));
|
||||
smartlist_free(cc->sendme_pending_timestamps);
|
||||
smartlist_free(cc->sendme_arrival_timestamps);
|
||||
|
||||
tor_free(cc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute an N-count EWMA, aka N-EWMA. N-EWMA is defined as:
|
||||
* EWMA = alpha*value + (1-alpha)*EWMA_prev
|
||||
* with alpha = 2/(N+1).
|
||||
*
|
||||
* This works out to:
|
||||
* EWMA = value*2/(N+1) + EMA_prev*(N-1)/(N+1)
|
||||
* = (value*2 + EWMA_prev*(N-1))/(N+1)
|
||||
*/
|
||||
static inline uint64_t
|
||||
n_count_ewma(uint64_t curr, uint64_t prev, uint64_t N)
|
||||
{
|
||||
if (prev == 0)
|
||||
return curr;
|
||||
else
|
||||
return (2*curr + (N-1)*prev)/(N+1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enqueue a u64 timestamp to the end of a queue of timestamps.
|
||||
*/
|
||||
static inline void
|
||||
enqueue_timestamp(smartlist_t *timestamps_u64, uint64_t timestamp_usec)
|
||||
{
|
||||
uint64_t *timestamp_ptr = tor_malloc(sizeof(uint64_t));
|
||||
*timestamp_ptr = timestamp_usec;
|
||||
|
||||
smartlist_add(timestamps_u64, timestamp_ptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Peek at the head of a smartlist queue of u64 timestamps.
|
||||
*/
|
||||
static inline uint64_t
|
||||
peek_timestamp(const smartlist_t *timestamps_u64_usecs)
|
||||
{
|
||||
uint64_t *timestamp_ptr = smartlist_get(timestamps_u64_usecs, 0);
|
||||
|
||||
if (BUG(!timestamp_ptr)) {
|
||||
log_err(LD_CIRC, "Congestion control timestamp list became empty!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return *timestamp_ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dequeue a u64 monotime usec timestamp from the front of a
|
||||
* smartlist of pointers to 64.
|
||||
*/
|
||||
static inline uint64_t
|
||||
dequeue_timestamp(smartlist_t *timestamps_u64_usecs)
|
||||
{
|
||||
uint64_t *timestamp_ptr = smartlist_get(timestamps_u64_usecs, 0);
|
||||
uint64_t timestamp_u64;
|
||||
|
||||
if (BUG(!timestamp_ptr)) {
|
||||
log_err(LD_CIRC, "Congestion control timestamp list became empty!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
timestamp_u64 = *timestamp_ptr;
|
||||
smartlist_del_keeporder(timestamps_u64_usecs, 0);
|
||||
tor_free(timestamp_ptr);
|
||||
|
||||
return timestamp_u64;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of sendme acks that will be recieved in the
|
||||
* current congestion window size, rounded to nearest int.
|
||||
*/
|
||||
static inline uint64_t
|
||||
sendme_acks_per_cwnd(const congestion_control_t *cc)
|
||||
{
|
||||
/* We add half a sendme_inc to cwnd to round to the nearest int */
|
||||
return ((cc->cwnd + cc->sendme_inc/2)/cc->sendme_inc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a package window from either old sendme logic, or congestion control.
|
||||
*
|
||||
* A package window is how many cells you can still send.
|
||||
*/
|
||||
int
|
||||
congestion_control_get_package_window(const circuit_t *circ,
|
||||
const crypt_path_t *cpath)
|
||||
{
|
||||
int package_window;
|
||||
congestion_control_t *cc;
|
||||
|
||||
tor_assert(circ);
|
||||
|
||||
if (cpath) {
|
||||
package_window = cpath->package_window;
|
||||
cc = cpath->ccontrol;
|
||||
} else {
|
||||
package_window = circ->package_window;
|
||||
cc = circ->ccontrol;
|
||||
}
|
||||
|
||||
if (!cc) {
|
||||
return package_window;
|
||||
} else {
|
||||
/* Inflight can be above cwnd if cwnd was just reduced */
|
||||
if (cc->inflight > cc->cwnd)
|
||||
return 0;
|
||||
/* In the extremely unlikely event that cwnd-inflight is larger than
|
||||
* INT32_MAX, just return that cap, so old code doesn't explode. */
|
||||
else if (cc->cwnd - cc->inflight > INT32_MAX)
|
||||
return INT32_MAX;
|
||||
else
|
||||
return (int)(cc->cwnd - cc->inflight);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of cells that are acked by every sendme.
|
||||
*/
|
||||
int
|
||||
sendme_get_inc_count(const circuit_t *circ, const crypt_path_t *layer_hint)
|
||||
{
|
||||
int sendme_inc = CIRCWINDOW_INCREMENT;
|
||||
congestion_control_t *cc = NULL;
|
||||
|
||||
if (layer_hint) {
|
||||
cc = layer_hint->ccontrol;
|
||||
} else {
|
||||
cc = circ->ccontrol;
|
||||
}
|
||||
|
||||
if (cc) {
|
||||
sendme_inc = cc->sendme_inc;
|
||||
}
|
||||
|
||||
return sendme_inc;
|
||||
}
|
||||
|
||||
/** Return true iff the next cell we send will result in the other endpoint
|
||||
* sending a SENDME.
|
||||
*
|
||||
* We are able to know that because the package or inflight window value minus
|
||||
* one cell (the possible SENDME cell) should be a multiple of the
|
||||
* cells-per-sendme increment value (set via consensus parameter, negotiated
|
||||
* for the circuit, and passed in as sendme_inc).
|
||||
*
|
||||
* This function is used when recording a cell digest and this is done quite
|
||||
* low in the stack when decrypting or encrypting a cell. The window is only
|
||||
* updated once the cell is actually put in the outbuf.
|
||||
*/
|
||||
bool
|
||||
circuit_sent_cell_for_sendme(const circuit_t *circ,
|
||||
const crypt_path_t *layer_hint)
|
||||
{
|
||||
congestion_control_t *cc;
|
||||
int window;
|
||||
|
||||
tor_assert(circ);
|
||||
|
||||
if (layer_hint) {
|
||||
window = layer_hint->package_window;
|
||||
cc = layer_hint->ccontrol;
|
||||
} else {
|
||||
window = circ->package_window;
|
||||
cc = circ->ccontrol;
|
||||
}
|
||||
|
||||
/* If we are using congestion control and the alg is not
|
||||
* old-school 'fixed', then use cc->inflight to determine
|
||||
* when sendmes will be sent */
|
||||
if (cc) {
|
||||
if (!cc->inflight)
|
||||
return false;
|
||||
|
||||
/* This check must be +1 because this function is called *before*
|
||||
* inflight is incremented for the sent cell */
|
||||
if ((cc->inflight+1) % cc->sendme_inc != 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* At the start of the window, no SENDME will be expected. */
|
||||
if (window == CIRCWINDOW_START) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Are we at the limit of the increment and if not, we don't expect next
|
||||
* cell is a SENDME.
|
||||
*
|
||||
* We test against the window minus 1 because when we are looking if the
|
||||
* next cell is a SENDME, the window (either package or deliver) hasn't been
|
||||
* decremented just yet so when this is called, we are currently processing
|
||||
* the "window - 1" cell.
|
||||
*/
|
||||
if (((window - 1) % CIRCWINDOW_INCREMENT) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Next cell is expected to be a SENDME. */
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call-in to tell congestion control code that this circuit sent a cell.
|
||||
*
|
||||
* This updates the 'inflight' counter, and if this is a cell that will
|
||||
* cause the other end to send a SENDME, record the current time in a list
|
||||
* of pending timestamps, so that we can later compute the circuit RTT when
|
||||
* the SENDME comes back. */
|
||||
void
|
||||
congestion_control_note_cell_sent(congestion_control_t *cc,
|
||||
const circuit_t *circ,
|
||||
const crypt_path_t *cpath)
|
||||
{
|
||||
tor_assert(circ);
|
||||
tor_assert(cc);
|
||||
|
||||
/* Is this the last cell before a SENDME? The idea is that if the
|
||||
* package_window reaches a multiple of the increment, after this cell, we
|
||||
* should expect a SENDME. Note that this function must be called *before*
|
||||
* we account for the sent cell. */
|
||||
if (!circuit_sent_cell_for_sendme(circ, cpath)) {
|
||||
cc->inflight++;
|
||||
return;
|
||||
}
|
||||
|
||||
cc->inflight++;
|
||||
|
||||
/* Record this cell time for RTT computation when SENDME arrives */
|
||||
enqueue_timestamp(cc->sendme_pending_timestamps,
|
||||
monotime_absolute_usec());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if any edge connections are active.
|
||||
*
|
||||
* We need to know this so that we can stop computing BDP if the
|
||||
* edges are not sending on the circuit.
|
||||
*/
|
||||
static int
|
||||
circuit_has_active_streams(const circuit_t *circ,
|
||||
const crypt_path_t *layer_hint)
|
||||
{
|
||||
const edge_connection_t *streams;
|
||||
|
||||
if (CIRCUIT_IS_ORIGIN(circ)) {
|
||||
streams = CONST_TO_ORIGIN_CIRCUIT(circ)->p_streams;
|
||||
} else {
|
||||
streams = CONST_TO_OR_CIRCUIT(circ)->n_streams;
|
||||
}
|
||||
|
||||
/* Check linked list of streams */
|
||||
for (const edge_connection_t *conn = streams; conn != NULL;
|
||||
conn = conn->next_stream) {
|
||||
if (conn->base_.marked_for_close)
|
||||
continue;
|
||||
|
||||
if (!layer_hint || conn->cpath_layer == layer_hint) {
|
||||
if (connection_get_inbuf_len(TO_CONN(conn)) > 0) {
|
||||
log_info(LD_CIRC, "CC: More in edge inbuf...");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* If we did not reach EOF on this read, there's more */
|
||||
if (!TO_CONN(conn)->inbuf_reached_eof) {
|
||||
log_info(LD_CIRC, "CC: More on edge conn...");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (TO_CONN(conn)->linked_conn) {
|
||||
if (connection_get_inbuf_len(TO_CONN(conn)->linked_conn) > 0) {
|
||||
log_info(LD_CIRC, "CC: More in linked inbuf...");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* If there is a linked conn, and *it* did not each EOF,
|
||||
* there's more */
|
||||
if (!TO_CONN(conn)->linked_conn->inbuf_reached_eof) {
|
||||
log_info(LD_CIRC, "CC: More on linked conn...");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Upon receipt of a SENDME, pop the oldest timestamp off the timestamp
|
||||
* list, and use this to update RTT.
|
||||
*
|
||||
* Returns true if circuit estimates were successfully updated, false
|
||||
* otherwise.
|
||||
*/
|
||||
bool
|
||||
congestion_control_update_circuit_estimates(congestion_control_t *cc,
|
||||
const circuit_t *circ,
|
||||
const crypt_path_t *layer_hint)
|
||||
{
|
||||
uint64_t now_usec = monotime_absolute_usec();
|
||||
|
||||
/* Update RTT first, then BDP. BDP needs fresh RTT */
|
||||
uint64_t curr_rtt_usec = congestion_control_update_circuit_rtt(cc, now_usec);
|
||||
return congestion_control_update_circuit_bdp(cc, circ, layer_hint, now_usec,
|
||||
curr_rtt_usec);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if we have enough time data to use heuristics
|
||||
* to compare RTT to a baseline.
|
||||
*/
|
||||
static bool
|
||||
time_delta_should_use_heuristics(const congestion_control_t *cc)
|
||||
{
|
||||
|
||||
/* If we have exited slow start, we should have processed at least
|
||||
* a cwnd worth of RTTs */
|
||||
if (!cc->in_slow_start) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If we managed to get enough acks to estimate a SENDME BDP, then
|
||||
* we have enough to estimate clock jumps relative to a baseline,
|
||||
* too. (This is at least 'cc_bwe_min' acks). */
|
||||
if (cc->bdp[BDP_ALG_SENDME_RATE]) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Not enough data to estimate clock jumps */
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the monotime delta is 0, or is significantly
|
||||
* different than the previous delta. Either case indicates
|
||||
* that the monotime time source stalled or jumped.
|
||||
*/
|
||||
static bool
|
||||
time_delta_stalled_or_jumped(const congestion_control_t *cc,
|
||||
uint64_t old_delta, uint64_t new_delta)
|
||||
{
|
||||
#define DELTA_DISCREPENCY_RATIO_MAX 100
|
||||
/* If we have a 0 new_delta, that is definitely a monotime stall */
|
||||
if (new_delta == 0) {
|
||||
static ratelim_t stall_info_limit = RATELIM_INIT(60);
|
||||
log_fn_ratelim(&stall_info_limit, LOG_INFO, LD_CIRC,
|
||||
"Congestion control cannot measure RTT due to monotime stall.");
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If the old_delta is 0, we have no previous values. So
|
||||
* just assume this one is valid (beause it is non-zero) */
|
||||
if (old_delta == 0)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* For the heuristic cases, we need at least a few timestamps,
|
||||
* to average out any previous partial stalls or jumps. So until
|
||||
* than point, let's just delcare these time values "good enough
|
||||
* to use".
|
||||
*/
|
||||
if (!time_delta_should_use_heuristics(cc)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If old_delta is significantly larger than new_delta, then
|
||||
* this means that the monotime clock recently stopped moving
|
||||
* forward. */
|
||||
if (old_delta > new_delta * DELTA_DISCREPENCY_RATIO_MAX) {
|
||||
static ratelim_t dec_notice_limit = RATELIM_INIT(300);
|
||||
log_fn_ratelim(&dec_notice_limit, LOG_NOTICE, LD_CIRC,
|
||||
"Sudden decrease in circuit RTT (%"PRIu64" vs %"PRIu64
|
||||
"), likely due to clock jump.",
|
||||
new_delta/1000, old_delta/1000);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If new_delta is significantly larger than old_delta, then
|
||||
* this means that the monotime clock suddenly jumped forward. */
|
||||
if (new_delta > old_delta * DELTA_DISCREPENCY_RATIO_MAX) {
|
||||
static ratelim_t dec_notice_limit = RATELIM_INIT(300);
|
||||
log_fn_ratelim(&dec_notice_limit, LOG_NOTICE, LD_CIRC,
|
||||
"Sudden increase in circuit RTT (%"PRIu64" vs %"PRIu64
|
||||
"), likely due to clock jump.",
|
||||
new_delta/1000, old_delta/1000);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when we get a SENDME. Updates circuit RTT by pulling off a
|
||||
* timestamp of when we sent the CIRCWINDOW_INCREMENT-th cell from
|
||||
* the queue of such timestamps, and comparing that to current time.
|
||||
*
|
||||
* Also updates min, max, and EWMA of RTT.
|
||||
*
|
||||
* Returns the current circuit RTT in usecs, or 0 if it could not be
|
||||
* measured (due to clock jump, stall, etc).
|
||||
*/
|
||||
static uint64_t
|
||||
congestion_control_update_circuit_rtt(congestion_control_t *cc,
|
||||
uint64_t now_usec)
|
||||
{
|
||||
uint64_t rtt, ewma_cnt;
|
||||
uint64_t sent_at_timestamp;
|
||||
|
||||
tor_assert(cc);
|
||||
|
||||
/* Get the time that we sent the cell that resulted in the other
|
||||
* end sending this sendme. Use this to calculate RTT */
|
||||
sent_at_timestamp = dequeue_timestamp(cc->sendme_pending_timestamps);
|
||||
|
||||
rtt = now_usec - sent_at_timestamp;
|
||||
|
||||
/* Do not update RTT at all if it looks fishy */
|
||||
if (time_delta_stalled_or_jumped(cc, cc->ewma_rtt_usec, rtt)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ewma_cnt = cc->ewma_cwnd_cnt*sendme_acks_per_cwnd(cc);
|
||||
ewma_cnt = MAX(ewma_cnt, 2); // Use at least 2
|
||||
|
||||
cc->ewma_rtt_usec = n_count_ewma(rtt, cc->ewma_rtt_usec, ewma_cnt);
|
||||
|
||||
if (rtt > cc->max_rtt_usec) {
|
||||
cc->max_rtt_usec = rtt;
|
||||
}
|
||||
|
||||
if (cc->min_rtt_usec == 0 || rtt < cc->min_rtt_usec) {
|
||||
cc->min_rtt_usec = rtt;
|
||||
}
|
||||
|
||||
return rtt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when we get a SENDME. Updates the bandwidth-delay-product (BDP)
|
||||
* estimates of a circuit. Several methods of computing BDP are used,
|
||||
* depending on scenario. While some congestion control algorithms only
|
||||
* use one of these methods, we update them all because it's quick and easy.
|
||||
*
|
||||
* - now_usec is the current monotime in usecs.
|
||||
* - curr_rtt_usec is the current circuit RTT in usecs. It may be 0 if no
|
||||
* RTT could bemeasured.
|
||||
*
|
||||
* Returns true if we were able to update BDP, false otherwise.
|
||||
*/
|
||||
static bool
|
||||
congestion_control_update_circuit_bdp(congestion_control_t *cc,
|
||||
const circuit_t *circ,
|
||||
const crypt_path_t *layer_hint,
|
||||
uint64_t now_usec,
|
||||
uint64_t curr_rtt_usec)
|
||||
{
|
||||
int chan_q = 0;
|
||||
unsigned int blocked_on_chan = 0;
|
||||
uint64_t timestamp_usec;
|
||||
uint64_t sendme_rate_bdp = 0;
|
||||
|
||||
tor_assert(cc);
|
||||
|
||||
if (CIRCUIT_IS_ORIGIN(circ)) {
|
||||
/* origin circs use n_chan */
|
||||
chan_q = circ->n_chan_cells.n;
|
||||
blocked_on_chan = circ->streams_blocked_on_n_chan;
|
||||
} else {
|
||||
/* Both onion services and exits use or_circuit and p_chan */
|
||||
chan_q = CONST_TO_OR_CIRCUIT(circ)->p_chan_cells.n;
|
||||
blocked_on_chan = circ->streams_blocked_on_p_chan;
|
||||
}
|
||||
|
||||
/* If we have no EWMA RTT, it is because monotime has been stalled
|
||||
* or messed up the entire time so far. Set our BDP estimates directly
|
||||
* to current cwnd */
|
||||
if (!cc->ewma_rtt_usec) {
|
||||
uint64_t cwnd = cc->cwnd;
|
||||
|
||||
/* If the channel is blocked, keep subtracting off the chan_q
|
||||
* until we hit the min cwnd. */
|
||||
if (blocked_on_chan) {
|
||||
cwnd = MAX(cwnd - chan_q, cc->cwnd_min);
|
||||
cc->blocked_chan = 1;
|
||||
} else {
|
||||
cc->blocked_chan = 0;
|
||||
}
|
||||
|
||||
cc->bdp[BDP_ALG_CWND_RTT] = cwnd;
|
||||
cc->bdp[BDP_ALG_INFLIGHT_RTT] = cwnd;
|
||||
cc->bdp[BDP_ALG_SENDME_RATE] = cwnd;
|
||||
cc->bdp[BDP_ALG_PIECEWISE] = cwnd;
|
||||
|
||||
static ratelim_t dec_notice_limit = RATELIM_INIT(300);
|
||||
log_fn_ratelim(&dec_notice_limit, LOG_NOTICE, LD_CIRC,
|
||||
"Our clock has been stalled for the entire lifetime of a circuit. "
|
||||
"Performance may be sub-optimal.");
|
||||
|
||||
return blocked_on_chan;
|
||||
}
|
||||
|
||||
/* Congestion window based BDP will respond to changes in RTT only, and is
|
||||
* relative to cwnd growth. It is useful for correcting for BDP
|
||||
* overestimation, but if BDP is higher than the current cwnd, it will
|
||||
* underestimate it.
|
||||
*
|
||||
* We multiply here first to avoid precision issues from min_RTT being
|
||||
* close to ewma RTT. Since all fields are u64, there is plenty of
|
||||
* room here to multiply first.
|
||||
*/
|
||||
cc->bdp[BDP_ALG_CWND_RTT] = cc->cwnd*cc->min_rtt_usec/cc->ewma_rtt_usec;
|
||||
|
||||
/*
|
||||
* If we have no pending streams, we do not have enough data to fill
|
||||
* the BDP, so preserve our old estimates but do not make any more.
|
||||
*/
|
||||
if (!blocked_on_chan && !circuit_has_active_streams(circ, layer_hint)) {
|
||||
log_info(LD_CIRC,
|
||||
"CC: Streams drained. Spare package window: %"PRIu64
|
||||
", no BDP update", cc->cwnd - cc->inflight);
|
||||
|
||||
/* Clear SENDME timestamps; they will be wrong with intermittent data */
|
||||
SMARTLIST_FOREACH(cc->sendme_arrival_timestamps, uint64_t *, t,
|
||||
tor_free(t));
|
||||
smartlist_clear(cc->sendme_arrival_timestamps);
|
||||
} else if (curr_rtt_usec) {
|
||||
/* Sendme-based BDP will quickly measure BDP in much less than
|
||||
* a cwnd worth of data when in use (in 2-10 SENDMEs).
|
||||
*
|
||||
* But if the link goes idle, it will be vastly lower than true BDP. Hence
|
||||
* we only compute it if we have either pending stream data, or streams
|
||||
* are still blocked on the channel queued data.
|
||||
*
|
||||
* We also do not compute it if we do not have a current RTT passed in,
|
||||
* because that means that monotime is currently stalled or just jumped.
|
||||
*/
|
||||
enqueue_timestamp(cc->sendme_arrival_timestamps, now_usec);
|
||||
|
||||
if (smartlist_len(cc->sendme_arrival_timestamps) >= cc->bwe_sendme_min) {
|
||||
/* If we have more sendmes than fit in a cwnd, trim the list.
|
||||
* Those are not acurrately measuring throughput, if cwnd is
|
||||
* currently smaller than BDP */
|
||||
while (smartlist_len(cc->sendme_arrival_timestamps) >
|
||||
cc->bwe_sendme_min &&
|
||||
(uint64_t)smartlist_len(cc->sendme_arrival_timestamps) >
|
||||
sendme_acks_per_cwnd(cc)) {
|
||||
(void)dequeue_timestamp(cc->sendme_arrival_timestamps);
|
||||
}
|
||||
int sendme_cnt = smartlist_len(cc->sendme_arrival_timestamps);
|
||||
|
||||
/* Calculate SENDME_BWE_COUNT pure average */
|
||||
timestamp_usec = peek_timestamp(cc->sendme_arrival_timestamps);
|
||||
uint64_t delta = now_usec - timestamp_usec;
|
||||
|
||||
/* The acked data is in sendme_cnt-1 chunks, because we are counting the
|
||||
* data that is processed by the other endpoint *between* all of these
|
||||
* sendmes. There's one less gap between the sendmes than the number
|
||||
* of sendmes. */
|
||||
uint64_t cells = (sendme_cnt-1)*cc->sendme_inc;
|
||||
|
||||
/* The bandwidth estimate is cells/delta, which when multiplied
|
||||
* by min RTT obtains the BDP. However, we multiply first to
|
||||
* avoid precision issues with the RTT being close to delta in size. */
|
||||
sendme_rate_bdp = cells*cc->min_rtt_usec/delta;
|
||||
|
||||
/* Calculate BDP_EWMA_COUNT N-EWMA */
|
||||
cc->bdp[BDP_ALG_SENDME_RATE] =
|
||||
n_count_ewma(sendme_rate_bdp, cc->bdp[BDP_ALG_SENDME_RATE],
|
||||
cc->ewma_cwnd_cnt*sendme_acks_per_cwnd(cc));
|
||||
}
|
||||
|
||||
/* In-flight BDP will cause the cwnd to drift down when underutilized.
|
||||
* It is most useful when the local OR conn is blocked, so we only
|
||||
* compute it if we're utilized. */
|
||||
cc->bdp[BDP_ALG_INFLIGHT_RTT] =
|
||||
(cc->inflight - chan_q)*cc->min_rtt_usec/
|
||||
MAX(cc->ewma_rtt_usec, curr_rtt_usec);
|
||||
} else {
|
||||
/* We can still update inflight with just an EWMA RTT, but only
|
||||
* if there is data flowing */
|
||||
cc->bdp[BDP_ALG_INFLIGHT_RTT] =
|
||||
(cc->inflight - chan_q)*cc->min_rtt_usec/cc->ewma_rtt_usec;
|
||||
}
|
||||
|
||||
/* The orconn is blocked; use smaller of inflight vs SENDME */
|
||||
if (blocked_on_chan) {
|
||||
log_info(LD_CIRC, "CC: Streams blocked on circ channel. Chanq: %d",
|
||||
chan_q);
|
||||
|
||||
/* A blocked channel is an immediate congestion signal, but it still
|
||||
* happens only once per cwnd */
|
||||
if (!cc->blocked_chan) {
|
||||
cc->next_cc_event = 0;
|
||||
cc->blocked_chan = 1;
|
||||
}
|
||||
|
||||
if (cc->bdp[BDP_ALG_SENDME_RATE]) {
|
||||
cc->bdp[BDP_ALG_PIECEWISE] = MIN(cc->bdp[BDP_ALG_INFLIGHT_RTT],
|
||||
cc->bdp[BDP_ALG_SENDME_RATE]);
|
||||
} else {
|
||||
cc->bdp[BDP_ALG_PIECEWISE] = cc->bdp[BDP_ALG_INFLIGHT_RTT];
|
||||
}
|
||||
} else {
|
||||
/* If we were previously blocked, emit a new congestion event
|
||||
* now that we are unblocked, to re-evaluate cwnd */
|
||||
if (cc->blocked_chan) {
|
||||
cc->blocked_chan = 0;
|
||||
cc->next_cc_event = 0;
|
||||
log_info(LD_CIRC, "CC: Streams un-blocked on circ channel. Chanq: %d",
|
||||
chan_q);
|
||||
}
|
||||
|
||||
cc->bdp[BDP_ALG_PIECEWISE] = MAX(cc->bdp[BDP_ALG_SENDME_RATE],
|
||||
cc->bdp[BDP_ALG_CWND_RTT]);
|
||||
}
|
||||
|
||||
/* We can end up with no piecewise value if we didn't have either
|
||||
* a SENDME estimate or enough data for an inflight estimate.
|
||||
* It also happens on the very first sendme, since we need two
|
||||
* to get a BDP. In these cases, use the cwnd method. */
|
||||
if (!cc->bdp[BDP_ALG_PIECEWISE]) {
|
||||
cc->bdp[BDP_ALG_PIECEWISE] = cc->bdp[BDP_ALG_CWND_RTT];
|
||||
log_info(LD_CIRC, "CC: No piecewise BDP. Using %"PRIu64,
|
||||
cc->bdp[BDP_ALG_PIECEWISE]);
|
||||
}
|
||||
|
||||
if (cc->next_cc_event == 0) {
|
||||
if (CIRCUIT_IS_ORIGIN(circ)) {
|
||||
log_info(LD_CIRC,
|
||||
"CC: Circuit %d "
|
||||
"SENDME RTT: %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", "
|
||||
"BDP estimates: "
|
||||
"%"PRIu64", "
|
||||
"%"PRIu64", "
|
||||
"%"PRIu64", "
|
||||
"%"PRIu64", "
|
||||
"%"PRIu64". ",
|
||||
CONST_TO_ORIGIN_CIRCUIT(circ)->global_identifier,
|
||||
cc->min_rtt_usec/1000,
|
||||
curr_rtt_usec/1000,
|
||||
cc->ewma_rtt_usec/1000,
|
||||
cc->max_rtt_usec/1000,
|
||||
cc->bdp[BDP_ALG_INFLIGHT_RTT],
|
||||
cc->bdp[BDP_ALG_CWND_RTT],
|
||||
sendme_rate_bdp,
|
||||
cc->bdp[BDP_ALG_SENDME_RATE],
|
||||
cc->bdp[BDP_ALG_PIECEWISE]
|
||||
);
|
||||
} else {
|
||||
log_info(LD_CIRC,
|
||||
"CC: Circuit %"PRIu64":%d "
|
||||
"SENDME RTT: %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", "
|
||||
"%"PRIu64", "
|
||||
"%"PRIu64", "
|
||||
"%"PRIu64", "
|
||||
"%"PRIu64", "
|
||||
"%"PRIu64". ",
|
||||
// XXX: actually, is this p_chan here? This is
|
||||
// an or_circuit (exit or onion)
|
||||
circ->n_chan->global_identifier, circ->n_circ_id,
|
||||
cc->min_rtt_usec/1000,
|
||||
curr_rtt_usec/1000,
|
||||
cc->ewma_rtt_usec/1000,
|
||||
cc->max_rtt_usec/1000,
|
||||
cc->bdp[BDP_ALG_INFLIGHT_RTT],
|
||||
cc->bdp[BDP_ALG_CWND_RTT],
|
||||
sendme_rate_bdp,
|
||||
cc->bdp[BDP_ALG_SENDME_RATE],
|
||||
cc->bdp[BDP_ALG_PIECEWISE]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/* We updated BDP this round if either we had a blocked channel, or
|
||||
* the curr_rtt_usec was not 0. */
|
||||
return (blocked_on_chan || curr_rtt_usec != 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispatch the sendme to the appropriate congestion control algorithm.
|
||||
*/
|
||||
int
|
||||
congestion_control_dispatch_cc_alg(congestion_control_t *cc,
|
||||
const circuit_t *circ,
|
||||
const crypt_path_t *layer_hint)
|
||||
{
|
||||
switch (cc->cc_alg) {
|
||||
case CC_ALG_WESTWOOD:
|
||||
return congestion_control_westwood_process_sendme(cc, circ, layer_hint);
|
||||
|
||||
case CC_ALG_VEGAS:
|
||||
return congestion_control_vegas_process_sendme(cc, circ, layer_hint);
|
||||
|
||||
case CC_ALG_NOLA:
|
||||
return congestion_control_nola_process_sendme(cc, circ, layer_hint);
|
||||
|
||||
case CC_ALG_SENDME:
|
||||
default:
|
||||
tor_assert(0);
|
||||
}
|
||||
|
||||
return -END_CIRC_REASON_INTERNAL;
|
||||
}
|
55
src/core/or/congestion_control_common.h
Normal file
55
src/core/or/congestion_control_common.h
Normal file
@ -0,0 +1,55 @@
|
||||
/* Copyright (c) 2019-2021, The Tor Project, Inc. */
|
||||
/* See LICENSE for licensing information */
|
||||
|
||||
/**
|
||||
* \file congestion_control_common.h
|
||||
* \brief Public APIs for congestion control
|
||||
**/
|
||||
|
||||
#ifndef TOR_CONGESTION_CONTROL_COMMON_H
|
||||
#define TOR_CONGESTION_CONTROL_COMMON_H
|
||||
|
||||
#include "core/or/crypt_path_st.h"
|
||||
#include "core/or/circuit_st.h"
|
||||
|
||||
typedef struct congestion_control_t congestion_control_t;
|
||||
|
||||
/** Wrapper for the free function, set the CC pointer to NULL after free */
|
||||
#define congestion_control_free(cc) \
|
||||
FREE_AND_NULL(congestion_control_t, congestion_control_free_, cc)
|
||||
|
||||
void congestion_control_free_(congestion_control_t *cc);
|
||||
|
||||
congestion_control_t *congestion_control_new(void);
|
||||
|
||||
int congestion_control_dispatch_cc_alg(congestion_control_t *cc,
|
||||
const circuit_t *circ,
|
||||
const crypt_path_t *layer_hint);
|
||||
|
||||
void congestion_control_note_cell_sent(congestion_control_t *cc,
|
||||
const circuit_t *circ,
|
||||
const crypt_path_t *cpath);
|
||||
|
||||
bool congestion_control_update_circuit_estimates(congestion_control_t *,
|
||||
const circuit_t *,
|
||||
const crypt_path_t *);
|
||||
|
||||
int congestion_control_get_package_window(const circuit_t *,
|
||||
const crypt_path_t *);
|
||||
|
||||
int sendme_get_inc_count(const circuit_t *, const crypt_path_t *);
|
||||
bool circuit_sent_cell_for_sendme(const circuit_t *, const crypt_path_t *);
|
||||
|
||||
/* Private section starts. */
|
||||
#ifdef TOR_CONGESTION_CONTROL_PRIVATE
|
||||
|
||||
/*
|
||||
* Unit tests declaractions.
|
||||
*/
|
||||
#ifdef TOR_UNIT_TESTS
|
||||
|
||||
#endif /* defined(TOR_UNIT_TESTS) */
|
||||
|
||||
#endif /* defined(TOR_CONGESTION_CONTROL_PRIVATE) */
|
||||
|
||||
#endif /* !defined(TOR_CONGESTION_CONTROL_COMMON_H) */
|
257
src/core/or/congestion_control_st.h
Normal file
257
src/core/or/congestion_control_st.h
Normal file
@ -0,0 +1,257 @@
|
||||
/* Copyright (c) 2019-2021, The Tor Project, Inc. */
|
||||
/* See LICENSE for licensing information */
|
||||
|
||||
/**
|
||||
* \file congestion_control_st.h
|
||||
* \brief Structure definitions for congestion control.
|
||||
**/
|
||||
|
||||
#ifndef CONGESTION_CONTROL_ST_H
|
||||
#define CONGESTION_CONTROL_ST_H
|
||||
|
||||
#include "core/or/crypt_path_st.h"
|
||||
#include "core/or/circuit_st.h"
|
||||
|
||||
/** Signifies which sendme algorithm to use */
|
||||
typedef enum {
|
||||
/** OG Tor fixed-sized circ and stream windows. It sucks, but it is important
|
||||
* to make sure that the new algs can compete with the old garbage. */
|
||||
CC_ALG_SENDME = 0,
|
||||
|
||||
/**
|
||||
* Prop#324 TOR_WESTWOOD - Deliberately agressive. Westwood may not even
|
||||
* converge to fairness in some cases because max RTT will also increase
|
||||
* on congesgtion, which boosts the Westwood RTT congestion threshhold. So it
|
||||
* can cause runaway queue bloat, which may or may not lead to a robot
|
||||
* uprising... Ok that's Westworld, not Westwood. Still, we need to test
|
||||
* Vegas and NOLA against something more agressive to ensure they do not
|
||||
* starve in the presence of cheaters. We also need to make sure cheaters
|
||||
* trigger the oomkiller in those cases.
|
||||
*/
|
||||
CC_ALG_WESTWOOD = 1,
|
||||
|
||||
/**
|
||||
* Prop#324 TOR_VEGAS - TCP Vegas-style BDP tracker. Because Vegas backs off
|
||||
* whenever it detects queue delay, it can be beaten out by more agressive
|
||||
* algs. However, in live network testing, it seems to do just fine against
|
||||
* current SENDMEs. It outperforms Westwood and does not stall. */
|
||||
CC_ALG_VEGAS = 2,
|
||||
|
||||
/**
|
||||
* Prop#324: TOR_NOLA - NOLA looks the BDP right in the eye and uses it
|
||||
* immediately as CWND. No slow start, no other congestion signals, no delay,
|
||||
* no bullshit. Like TOR_VEGAS, it also uses agressive BDP estimates, to
|
||||
* avoid out-competition. It seems a bit better throughput than Vegas,
|
||||
* but its agressive BDP and rapid updates may lead to more queue latency. */
|
||||
CC_ALG_NOLA = 3,
|
||||
} cc_alg_t;
|
||||
|
||||
/* Total number of CC algs in cc_alg_t enum */
|
||||
#define NUM_CC_ALGS (CC_ALG_NOLA+1)
|
||||
|
||||
/** Signifies how we estimate circuit BDP */
|
||||
typedef enum {
|
||||
/* CWND-based BDP will respond to changes in RTT only, and is relative
|
||||
* to cwnd growth. So in slow-start, this will under-estimate BDP */
|
||||
BDP_ALG_CWND_RTT = 0,
|
||||
|
||||
/* Sendme-based BDP will quickly measure BDP in less than
|
||||
* a cwnd worth of data when in use. So it should be good for slow-start.
|
||||
* But if the link goes idle, it will be vastly lower than true BDP. Thus,
|
||||
* this estimate gets reset when the cwnd is not fully utilized. */
|
||||
BDP_ALG_SENDME_RATE = 1,
|
||||
|
||||
/* Inflight BDP is similar to the cwnd estimator, except it uses
|
||||
* packets inflight minus local circuit queues instead of current cwnd.
|
||||
* Because it is strictly less than or equal to the cwnd, it will cause
|
||||
* the cwnd to drift downward. It is only used if the local OR connection
|
||||
* is blocked. */
|
||||
BDP_ALG_INFLIGHT_RTT = 2,
|
||||
|
||||
/* The Piecewise BDP estimator uses the CWND estimator before there
|
||||
* are sufficient SENDMEs to calculate the SENDME estimator. At that
|
||||
* point, it uses the SENDME estimator, unless the local OR connection
|
||||
* becomes blocked. In that case, it switches to the inflight estimator. */
|
||||
BDP_ALG_PIECEWISE = 3,
|
||||
|
||||
} bdp_alg_t;
|
||||
|
||||
/** Total number of BDP algs in bdp_alg_t enum */
|
||||
#define NUM_BDP_ALGS (BDP_ALG_PIECEWISE+1)
|
||||
|
||||
/** Westwood algorithm parameters */
|
||||
struct westwood_params_t {
|
||||
/** Cwnd backoff multiplier upon congestion (as percent) */
|
||||
uint8_t cwnd_backoff_m;
|
||||
/** Max RTT backoff multiplier upon congestion (as percent) */
|
||||
uint8_t rtt_backoff_m;
|
||||
|
||||
/** Threshold between min and max RTT, to signal congestion (percent) */
|
||||
uint8_t rtt_thresh;
|
||||
|
||||
/**
|
||||
* If true, use minimum of BDP and backoff multiplication in backoff.
|
||||
* If false, use maximum of BDP and backoff multiplication in backoff. */
|
||||
bool min_backoff;
|
||||
};
|
||||
|
||||
/** Vegas algorithm parameters. */
|
||||
struct vegas_params_t {
|
||||
/** The queue use allowed before we exit slow start */
|
||||
uint16_t gamma;
|
||||
/** The queue use below which we increment cwnd */
|
||||
uint16_t alpha;
|
||||
/** The queue use above which we decrement cwnd */
|
||||
uint16_t beta;
|
||||
/** Weighted average (percent) between cwnd estimator and
|
||||
* piecewise estimator. */
|
||||
uint8_t bdp_mix_pct;
|
||||
};
|
||||
|
||||
/** NOLA consensus params */
|
||||
struct nola_params_t {
|
||||
/** How many cells to add to BDP estimate to obtain cwnd */
|
||||
uint16_t bdp_overshoot;
|
||||
};
|
||||
|
||||
/** Fields common to all congestion control algorithms */
|
||||
typedef struct congestion_control_t {
|
||||
/**
|
||||
* Smartlist of uint64_t monotime usec timestamps of when we sent a data
|
||||
* cell that is pending a sendme. FIFO queue that is managed similar to
|
||||
* sendme_last_digests. */
|
||||
smartlist_t *sendme_pending_timestamps;
|
||||
|
||||
/**
|
||||
* Smartlist of uint64_t monotime timestamp of when sendme's arrived.
|
||||
* FIFO queue that is managed similar to sendme_last_digests.
|
||||
* Used to estimate circuitbandwidth and BDP. */
|
||||
smartlist_t *sendme_arrival_timestamps;
|
||||
|
||||
/** RTT time data for congestion control. */
|
||||
uint64_t ewma_rtt_usec;
|
||||
uint64_t min_rtt_usec;
|
||||
uint64_t max_rtt_usec;
|
||||
|
||||
/* BDP estimates by algorithm */
|
||||
uint64_t bdp[NUM_BDP_ALGS];
|
||||
|
||||
/** Congestion window */
|
||||
uint64_t cwnd;
|
||||
|
||||
/** Number of cells in-flight (sent but awaiting SENDME ack). */
|
||||
uint64_t inflight;
|
||||
|
||||
/**
|
||||
* For steady-state: the number of sendme acks until we will acknowledge
|
||||
* a congestion event again. It starts out as the number of sendme acks
|
||||
* in a congestion windowm and is decremented each ack. When this reaches
|
||||
* 0, it means we should examine our congestion algorithm conditions.
|
||||
* In this way, we only react to one congestion event per congestion window.
|
||||
*
|
||||
* It is also reset to 0 immediately whenever the circuit's orconn is
|
||||
* blocked, and when a previously blocked orconn is unblocked.
|
||||
*/
|
||||
uint64_t next_cc_event;
|
||||
|
||||
/** Are we in slow start? */
|
||||
bool in_slow_start;
|
||||
|
||||
/** Is the local channel blocked on us? That's a congestion signal */
|
||||
bool blocked_chan;
|
||||
|
||||
/* The following parameters are cached from consensus values upon
|
||||
* circuit setup. */
|
||||
|
||||
/** Percent of cwnd to increment by during slow start */
|
||||
uint16_t cwnd_inc_pct_ss;
|
||||
|
||||
/** Number of cells to increment cwnd by during steady state */
|
||||
uint16_t cwnd_inc;
|
||||
|
||||
/** Minimum congestion window (must be at least sendme_inc) */
|
||||
uint16_t cwnd_min;
|
||||
|
||||
/**
|
||||
* Number of times per congestion window to update based on congestion
|
||||
* signals */
|
||||
uint8_t cwnd_inc_rate;
|
||||
|
||||
/**
|
||||
* Number of cwnd worth of sendme acks to smooth RTT and BDP with,
|
||||
* using N_EWMA */
|
||||
uint8_t ewma_cwnd_cnt;
|
||||
|
||||
/**
|
||||
* Minimum number of sendmes before we begin BDP estimates
|
||||
*/
|
||||
uint8_t bwe_sendme_min;
|
||||
|
||||
/**
|
||||
* Number of cells to ack with every sendme. Taken from consensus parameter
|
||||
* and negotiation during circuit setup. */
|
||||
uint8_t sendme_inc;
|
||||
|
||||
/** Which congestion control algorithm to use. Taken from
|
||||
* consensus parameter and negotiation during circuit setup. */
|
||||
cc_alg_t cc_alg;
|
||||
|
||||
/** Which algorithm to estimate circuit bandwidth with. Taken from
|
||||
* consensus parameter during circuit setup. */
|
||||
bdp_alg_t bdp_alg;
|
||||
|
||||
/** Algorithm-specific parameters. The specific struct that is used
|
||||
* depends upon the algoritghm selected by the cc_alg parameter.
|
||||
* These should not be accessed anywhere other than the algorithm-specific
|
||||
* files. */
|
||||
union {
|
||||
struct westwood_params_t westwood_params;
|
||||
struct vegas_params_t vegas_params;
|
||||
struct nola_params_t nola_params;
|
||||
};
|
||||
} congestion_control_t;
|
||||
|
||||
/**
|
||||
* Returns the number of sendme acks we will recieve before we update cwnd.
|
||||
*
|
||||
* Congestion control literature recommends only one update of cwnd per
|
||||
* cwnd worth of acks. However, we can also tune this to be more frequent
|
||||
* by increasing the 'cc_cwnd_inc_rate' consensus parameter.
|
||||
*
|
||||
* If this returns 0 due to high cwnd_inc_rate, the calling code will
|
||||
* update every sendme ack.
|
||||
*/
|
||||
static inline uint64_t CWND_UPDATE_RATE(const congestion_control_t *cc)
|
||||
{
|
||||
/* We add cwnd_inc_rate*sendme_inc/2 to round to nearest integer number
|
||||
* of acks */
|
||||
return ((cc->cwnd + cc->cwnd_inc_rate*cc->sendme_inc/2)
|
||||
/ (cc->cwnd_inc_rate*cc->sendme_inc));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the amount to increment the congestion window each update,
|
||||
* during slow start.
|
||||
*
|
||||
* Congestion control literature recommends either doubling the cwnd
|
||||
* every cwnd during slow start, or some similar exponential growth
|
||||
* (such as 50% more every cwnd, for Vegas).
|
||||
*
|
||||
* This is controlled by a consensus parameter 'cwnd_inc_pct_ss', which
|
||||
* allows us to specify the percent of the current consensus window
|
||||
* to update by.
|
||||
*/
|
||||
static inline uint64_t CWND_INC_SS(const congestion_control_t *cc)
|
||||
{
|
||||
return (cc->cwnd_inc_pct_ss*cc->cwnd/100);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the amount to increment (and for Vegas, also decrement) the
|
||||
* congestion window by, every update period.
|
||||
*
|
||||
* This is controlled by the cc_cwnd_inc consensus parameter.
|
||||
*/
|
||||
#define CWND_INC(cc) ((cc)->cwnd_inc)
|
||||
|
||||
#endif /* !defined(CONGESTION_CONTROL_ST_H) */
|
@ -35,6 +35,7 @@ LIBTOR_APP_A_SOURCES += \
|
||||
src/core/or/scheduler_kist.c \
|
||||
src/core/or/scheduler_vanilla.c \
|
||||
src/core/or/sendme.c \
|
||||
src/core/or/sendme_common.c \
|
||||
src/core/or/status.c \
|
||||
src/core/or/versions.c
|
||||
|
||||
@ -97,6 +98,7 @@ noinst_HEADERS += \
|
||||
src/core/or/relay_crypto_st.h \
|
||||
src/core/or/scheduler.h \
|
||||
src/core/or/sendme.h \
|
||||
src/core/or/sendme_common.h \
|
||||
src/core/or/server_port_cfg_st.h \
|
||||
src/core/or/socks_request_st.h \
|
||||
src/core/or/status.h \
|
||||
|
Loading…
Reference in New Issue
Block a user