diff --git a/changes/bug11553 b/changes/bug11553 new file mode 100644 index 0000000000..ed30ce9850 --- /dev/null +++ b/changes/bug11553 @@ -0,0 +1,15 @@ + o Minor features: + - When we run out of usable circuit IDs on a channel, log only one + warning for the whole channel, and include a description of + how many circuits there were on the channel. Fix for part of ticket + #11553. + + + o Major features (performance): + - Avoid wasting cycles looking for usable circuit IDs. Previously, + when allocating a new circuit ID, we would in the worst case do a + linear scan over the entire possible range of circuit IDs before + deciding that we had exhausted our possibilities. Now, we + try 64 circuit IDs at random before deciding that we probably + won't succeed. Fix for a possible root cause of ticket + #11553. diff --git a/src/or/channel.c b/src/or/channel.c index 32e87c342c..63af2f91c0 100644 --- a/src/or/channel.c +++ b/src/or/channel.c @@ -728,8 +728,8 @@ channel_init(channel_t *chan) /* Init timestamp */ chan->timestamp_last_added_nonpadding = time(NULL); - /* Init next_circ_id */ - chan->next_circ_id = crypto_rand_int(1 << 15); + /* Warn about exhausted circuit IDs no more than hourly. */ + chan->last_warned_circ_ids_exhausted.rate = 3600; /* Initialize queues. */ TOR_SIMPLEQ_INIT(&chan->incoming_queue); diff --git a/src/or/channel.h b/src/or/channel.h index 7ec222df0f..bd9a02f323 100644 --- a/src/or/channel.h +++ b/src/or/channel.h @@ -149,11 +149,6 @@ struct channel_s { circ_id_type_bitfield_t circ_id_type:2; /** DOCDOC*/ unsigned wide_circ_ids:1; - /** - * Which circ_id do we try to use next on this connection? This is - * always in the range 0..1<<15-1. - */ - circid_t next_circ_id; /** For how many circuits are we n_chan? What about p_chan? */ unsigned int num_n_circuits, num_p_circuits; @@ -182,6 +177,10 @@ struct channel_s { */ unsigned int is_local:1; + /** Have we logged a warning about circID exhaustion on this channel? + * If so, when? */ + ratelim_t last_warned_circ_ids_exhausted; + /** Channel timestamps for cell channels */ time_t timestamp_client; /* Client used this, according to relay.c */ time_t timestamp_drained; /* Output queue empty */ diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 98fef4c142..9e11a0bb1a 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -77,18 +77,28 @@ channel_connect_for_circuit(const tor_addr_t *addr, uint16_t port, return chan; } -/** Iterate over values of circ_id, starting from conn-\>next_circ_id, - * and with the high bit specified by conn-\>circ_id_type, until we get - * a circ_id that is not in use by any other circuit on that conn. + +/** Search for a value for circ_id that we can use on chan for an + * outbound circuit, until we get a circ_id that is not in use by any other + * circuit on that conn. * * Return it, or 0 if can't get a unique circ_id. */ static circid_t get_unique_circ_id_by_chan(channel_t *chan) { +/* This number is chosen somewhat arbitrarily; see comment below for more + * info. When the space is 80% full, it gives a one-in-a-million failure + * chance; when the space is 90% full, it gives a one-in-850 chance; and when + * the space is 95% full, it gives a one-in-26 failure chance. That seems + * okay, though you could make a case IMO for anything between N=32 and + * N=256. */ +#define MAX_CIRCID_ATTEMPTS 64 + int in_use; + unsigned n_with_circ = 0, n_pending_destroy = 0; circid_t test_circ_id; circid_t attempts=0; - circid_t high_bit, max_range; + circid_t high_bit, max_range, mask; tor_assert(chan); @@ -98,25 +108,52 @@ get_unique_circ_id_by_chan(channel_t *chan) "a client with no identity."); return 0; } - max_range = (chan->wide_circ_ids) ? (1u<<31) : (1u<<15); + max_range = (chan->wide_circ_ids) ? (1u<<31) : (1u<<15); + mask = max_range - 1; high_bit = (chan->circ_id_type == CIRC_ID_TYPE_HIGHER) ? max_range : 0; do { - /* Sequentially iterate over test_circ_id=1...max_range until we find a - * circID such that (high_bit|test_circ_id) is not already used. */ - test_circ_id = chan->next_circ_id++; - if (test_circ_id == 0 || test_circ_id >= max_range) { - test_circ_id = 1; - chan->next_circ_id = 2; - } - if (++attempts > max_range) { - /* Make sure we don't loop forever if all circ_id's are used. This - * matters because it's an external DoS opportunity. + if (++attempts > MAX_CIRCID_ATTEMPTS) { + /* Make sure we don't loop forever because all circuit IDs are used. + * + * Once, we would try until we had tried every possible circuit ID. But + * that's quite expensive. Instead, we try MAX_CIRCID_ATTEMPTS random + * circuit IDs, and then give up. + * + * This potentially causes us to give up early if our circuit ID space + * is nearly full. If we have N circuit IDs in use, then we will reject + * a new circuit with probability (N / max_range) ^ MAX_CIRCID_ATTEMPTS. + * This means that in practice, a few percent of our circuit ID capacity + * will go unused. + * + * The alternative here, though, is to do a linear search over the + * whole circuit ID space every time we extend a circuit, which is + * not so great either. */ - log_warn(LD_CIRC,"No unused circ IDs. Failing."); + log_fn_ratelim(&chan->last_warned_circ_ids_exhausted, LOG_WARN, + LD_CIRC,"No unused circIDs found on channel %s wide " + "circID support, with %u inbound and %u outbound circuits. " + "Found %u circuit IDs in use by circuits, and %u with " + "pending destroy cells." + "Failing a circuit.", + chan->wide_circ_ids ? "with" : "without", + chan->num_p_circuits, chan->num_n_circuits, + n_with_circ, n_pending_destroy); return 0; } + + do { + crypto_rand((char*) &test_circ_id, sizeof(test_circ_id)); + test_circ_id &= mask; + } while (test_circ_id == 0); + test_circ_id |= high_bit; - } while (circuit_id_in_use_on_channel(test_circ_id, chan)); + + in_use = circuit_id_in_use_on_channel(test_circ_id, chan); + if (in_use == 1) + ++n_with_circ; + else if (in_use == 2) + ++n_pending_destroy; + } while (in_use); return test_circ_id; } @@ -561,7 +598,9 @@ circuit_deliver_create_cell(circuit_t *circ, const create_cell_t *create_cell, id = get_unique_circ_id_by_chan(circ->n_chan); if (!id) { - log_warn(LD_CIRC,"failed to get unique circID."); + static ratelim_t circid_warning_limit = RATELIM_INIT(9600); + log_fn_ratelim(&circid_warning_limit, LOG_WARN, LD_CIRC, + "failed to get unique circID."); return -1; } log_debug(LD_CIRC,"Chosen circID %u.", (unsigned)id); diff --git a/src/or/circuitlist.c b/src/or/circuitlist.c index b71dc3c13a..8a8fc8b4ed 100644 --- a/src/or/circuitlist.c +++ b/src/or/circuitlist.c @@ -1065,13 +1065,21 @@ circuit_get_by_circid_channel_even_if_marked(circid_t circ_id, } /** Return true iff the circuit ID circ_id is currently used by a - * circuit, marked or not, on chan. */ + * circuit, marked or not, on chan, or if the circ ID is reserved until + * a queued destroy cell can be sent. + * + * (Return 1 if the circuit is present, marked or not; Return 2 + * if the circuit ID is pending a destroy.) + **/ int circuit_id_in_use_on_channel(circid_t circ_id, channel_t *chan) { int found = 0; - return circuit_get_by_circid_channel_impl(circ_id, chan, &found) != NULL - || found; + if (circuit_get_by_circid_channel_impl(circ_id, chan, &found) != NULL) + return 1; + if (found) + return 2; + return 0; } /** Return the circuit that a given edge connection is using. */ diff --git a/src/or/connection.c b/src/or/connection.c index 2e72e6b397..9614fa52da 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -271,8 +271,6 @@ dir_connection_new(int socket_family) * * Set timestamp_last_added_nonpadding to now. * - * Assign a pseudorandom next_circ_id between 0 and 2**15. - * * Initialize active_circuit_pqueue. * * Set active_circuit_pqueue_last_recalibrated to current cell_ewma tick.