mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-10 21:23:58 +01:00
Merge branch 'bug12184_diagnostic_squashed'
This commit is contained in:
commit
a58d94fb7c
4
changes/bug12184_diagnostic
Normal file
4
changes/bug12184_diagnostic
Normal file
@ -0,0 +1,4 @@
|
||||
o Minor features (diagnostic):
|
||||
- Add significant new logging code to attempt to diagnose bug 12184,
|
||||
where relays seem to run out of available circuit IDs.
|
||||
|
@ -96,10 +96,12 @@ get_unique_circ_id_by_chan(channel_t *chan)
|
||||
* N=256. */
|
||||
#define MAX_CIRCID_ATTEMPTS 64
|
||||
int in_use;
|
||||
unsigned n_with_circ = 0, n_pending_destroy = 0;
|
||||
unsigned n_with_circ = 0, n_pending_destroy = 0, n_weird_pending_destroy = 0;
|
||||
circid_t test_circ_id;
|
||||
circid_t attempts=0;
|
||||
circid_t high_bit, max_range, mask;
|
||||
int64_t pending_destroy_time_total = 0;
|
||||
int64_t pending_destroy_time_max = 0;
|
||||
|
||||
tor_assert(chan);
|
||||
|
||||
@ -130,15 +132,57 @@ get_unique_circ_id_by_chan(channel_t *chan)
|
||||
* whole circuit ID space every time we extend a circuit, which is
|
||||
* not so great either.
|
||||
*/
|
||||
log_fn_ratelim(&chan->last_warned_circ_ids_exhausted, LOG_WARN,
|
||||
LD_CIRC,"No unused circIDs found on channel %s wide "
|
||||
int64_t queued_destroys;
|
||||
char *m = rate_limit_log(&chan->last_warned_circ_ids_exhausted,
|
||||
approx_time());
|
||||
if (m == NULL)
|
||||
return 0; /* This message has been rate-limited away. */
|
||||
if (n_pending_destroy)
|
||||
pending_destroy_time_total /= n_pending_destroy;
|
||||
log_warn(LD_CIRC,"No unused circIDs found on channel %s wide "
|
||||
"circID support, with %u inbound and %u outbound circuits. "
|
||||
"Found %u circuit IDs in use by circuits, and %u with "
|
||||
"pending destroy cells."
|
||||
"Failing a circuit.",
|
||||
"pending destroy cells. (%u of those were marked bogusly.) "
|
||||
"The ones with pending destroy cells "
|
||||
"have been marked unusable for an average of %ld seconds "
|
||||
"and a maximum of %ld seconds. This channel is %ld seconds "
|
||||
"old. Failing a circuit.%s",
|
||||
chan->wide_circ_ids ? "with" : "without",
|
||||
chan->num_p_circuits, chan->num_n_circuits,
|
||||
n_with_circ, n_pending_destroy);
|
||||
n_with_circ, n_pending_destroy, n_weird_pending_destroy,
|
||||
(long)pending_destroy_time_total,
|
||||
(long)pending_destroy_time_max,
|
||||
(long)(approx_time() - chan->timestamp_created),
|
||||
m);
|
||||
tor_free(m);
|
||||
|
||||
if (!chan->cmux) {
|
||||
/* This warning should be impossible. */
|
||||
log_warn(LD_BUG, " This channel somehow has no cmux on it!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* analysis so far on 12184 suggests that we're running out of circuit
|
||||
IDs because it looks like we have too many pending destroy
|
||||
cells. Let's see how many we really have pending.
|
||||
*/
|
||||
queued_destroys = circuitmux_count_queued_destroy_cells(chan,
|
||||
chan->cmux);
|
||||
|
||||
log_warn(LD_CIRC, " Circuitmux on this channel has %u circuits, "
|
||||
"of which %u are active. It says it has "I64_FORMAT
|
||||
" destroy cells queued.",
|
||||
circuitmux_num_circuits(chan->cmux),
|
||||
circuitmux_num_active_circuits(chan->cmux),
|
||||
I64_PRINTF_ARG(queued_destroys));
|
||||
|
||||
/* Change this into "if (1)" in order to get more information about
|
||||
* possible failure modes here. You'll need to know how to use gdb with
|
||||
* Tor: this will make Tor exit with an assertion failure if the cmux is
|
||||
* corrupt. */
|
||||
if (0)
|
||||
circuitmux_assert_okay(chan->cmux);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -152,8 +196,20 @@ get_unique_circ_id_by_chan(channel_t *chan)
|
||||
in_use = circuit_id_in_use_on_channel(test_circ_id, chan);
|
||||
if (in_use == 1)
|
||||
++n_with_circ;
|
||||
else if (in_use == 2)
|
||||
else if (in_use == 2) {
|
||||
time_t since_when;
|
||||
++n_pending_destroy;
|
||||
since_when =
|
||||
circuit_id_when_marked_unusable_on_channel(test_circ_id, chan);
|
||||
if (since_when) {
|
||||
time_t waiting = approx_time() - since_when;
|
||||
pending_destroy_time_total += waiting;
|
||||
if (waiting > pending_destroy_time_max)
|
||||
pending_destroy_time_max = waiting;
|
||||
} else {
|
||||
++n_weird_pending_destroy;
|
||||
}
|
||||
}
|
||||
} while (in_use);
|
||||
return test_circ_id;
|
||||
}
|
||||
|
@ -59,6 +59,8 @@ typedef struct chan_circid_circuit_map_t {
|
||||
channel_t *chan;
|
||||
circid_t circ_id;
|
||||
circuit_t *circuit;
|
||||
/* For debugging 12184: when was this placeholder item added? */
|
||||
time_t made_placeholder_at;
|
||||
} chan_circid_circuit_map_t;
|
||||
|
||||
/** Helper for hash tables: compare the channel and circuit ID for a and
|
||||
@ -184,6 +186,7 @@ circuit_set_circid_chan_helper(circuit_t *circ, int direction,
|
||||
found = HT_FIND(chan_circid_map, &chan_circid_map, &search);
|
||||
if (found) {
|
||||
found->circuit = circ;
|
||||
found->made_placeholder_at = 0;
|
||||
} else {
|
||||
found = tor_malloc_zero(sizeof(chan_circid_circuit_map_t));
|
||||
found->circ_id = id;
|
||||
@ -241,11 +244,14 @@ channel_mark_circid_unusable(channel_t *chan, circid_t id)
|
||||
"a circuit there.", (unsigned)id, chan);
|
||||
} else if (ent) {
|
||||
/* It's already marked. */
|
||||
if (!ent->made_placeholder_at)
|
||||
ent->made_placeholder_at = approx_time();
|
||||
} else {
|
||||
ent = tor_malloc_zero(sizeof(chan_circid_circuit_map_t));
|
||||
ent->chan = chan;
|
||||
ent->circ_id = id;
|
||||
/* leave circuit at NULL */
|
||||
/* leave circuit at NULL. */
|
||||
ent->made_placeholder_at = approx_time();
|
||||
HT_INSERT(chan_circid_map, &chan_circid_map, ent);
|
||||
}
|
||||
}
|
||||
@ -1096,6 +1102,27 @@ circuit_id_in_use_on_channel(circid_t circ_id, channel_t *chan)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** Helper for debugging 12184. Returns the time since which 'circ_id' has
|
||||
* been marked unusable on 'chan'. */
|
||||
time_t
|
||||
circuit_id_when_marked_unusable_on_channel(circid_t circ_id, channel_t *chan)
|
||||
{
|
||||
chan_circid_circuit_map_t search;
|
||||
chan_circid_circuit_map_t *found;
|
||||
|
||||
memset(&search, 0, sizeof(search));
|
||||
search.circ_id = circ_id;
|
||||
search.chan = chan;
|
||||
|
||||
found = HT_FIND(chan_circid_map, &chan_circid_map, &search);
|
||||
|
||||
if (! found || found->circuit)
|
||||
return 0;
|
||||
|
||||
return found->made_placeholder_at;
|
||||
}
|
||||
|
||||
|
||||
/** Return the circuit that a given edge connection is using. */
|
||||
circuit_t *
|
||||
circuit_get_by_edge_conn(edge_connection_t *conn)
|
||||
|
@ -28,6 +28,8 @@ void circuit_set_n_circid_chan(circuit_t *circ, circid_t id,
|
||||
channel_t *chan);
|
||||
void channel_mark_circid_unusable(channel_t *chan, circid_t id);
|
||||
void channel_mark_circid_usable(channel_t *chan, circid_t id);
|
||||
time_t circuit_id_when_marked_unusable_on_channel(circid_t circ_id,
|
||||
channel_t *chan);
|
||||
void circuit_set_state(circuit_t *circ, uint8_t state);
|
||||
void circuit_close_all_marked(void);
|
||||
int32_t circuit_initial_package_window(void);
|
||||
|
@ -1909,3 +1909,38 @@ circuitmux_append_destroy_cell(channel_t *chan,
|
||||
}
|
||||
}
|
||||
|
||||
/*DOCDOC; for debugging 12184. This runs slowly. */
|
||||
int64_t
|
||||
circuitmux_count_queued_destroy_cells(const channel_t *chan,
|
||||
const circuitmux_t *cmux)
|
||||
{
|
||||
int64_t n_destroy_cells = cmux->destroy_ctr;
|
||||
int64_t destroy_queue_size = cmux->destroy_cell_queue.n;
|
||||
|
||||
int64_t manual_total = 0;
|
||||
int64_t manual_total_in_map = 0;
|
||||
packed_cell_t *cell;
|
||||
|
||||
TOR_SIMPLEQ_FOREACH(cell, &cmux->destroy_cell_queue.head, next) {
|
||||
circid_t id;
|
||||
++manual_total;
|
||||
|
||||
id = packed_cell_get_circid(cell, chan->wide_circ_ids);
|
||||
if (circuit_id_in_use_on_channel(id, (channel_t*)chan))
|
||||
++manual_total_in_map;
|
||||
}
|
||||
|
||||
if (n_destroy_cells != destroy_queue_size ||
|
||||
n_destroy_cells != manual_total ||
|
||||
n_destroy_cells != manual_total_in_map) {
|
||||
log_warn(LD_BUG, " Discrepancy in counts for queued destroy cells on "
|
||||
"circuitmux. n="I64_FORMAT". queue_size="I64_FORMAT". "
|
||||
"manual_total="I64_FORMAT". manual_total_in_map="I64_FORMAT".",
|
||||
I64_PRINTF_ARG(n_destroy_cells),
|
||||
I64_PRINTF_ARG(destroy_queue_size),
|
||||
I64_PRINTF_ARG(manual_total),
|
||||
I64_PRINTF_ARG(manual_total_in_map));
|
||||
}
|
||||
|
||||
return n_destroy_cells;
|
||||
}
|
||||
|
@ -121,6 +121,10 @@ unsigned int circuitmux_num_cells(circuitmux_t *cmux);
|
||||
unsigned int circuitmux_num_circuits(circuitmux_t *cmux);
|
||||
unsigned int circuitmux_num_active_circuits(circuitmux_t *cmux);
|
||||
|
||||
/* Debuging interface - slow. */
|
||||
int64_t circuitmux_count_queued_destroy_cells(const channel_t *chan,
|
||||
const circuitmux_t *cmux);
|
||||
|
||||
/* Channel interface */
|
||||
circuit_t * circuitmux_get_first_active_circuit(circuitmux_t *cmux,
|
||||
cell_queue_t **destroy_queue_out);
|
||||
|
@ -2569,6 +2569,17 @@ packed_cell_get_command(const packed_cell_t *cell, int wide_circ_ids)
|
||||
}
|
||||
}
|
||||
|
||||
/** Extract the circuit ID from a packed cell. */
|
||||
circid_t
|
||||
packed_cell_get_circid(const packed_cell_t *cell, int wide_circ_ids)
|
||||
{
|
||||
if (wide_circ_ids) {
|
||||
return ntohl(get_uint32(cell->body));
|
||||
} else {
|
||||
return ntohs(get_uint16(cell->body));
|
||||
}
|
||||
}
|
||||
|
||||
/** Pull as many cells as possible (but no more than <b>max</b>) from the
|
||||
* queue of the first active circuit on <b>chan</b>, and write them to
|
||||
* <b>chan</b>->outbuf. Return the number of cells written. Advance
|
||||
|
@ -82,6 +82,8 @@ void stream_choice_seed_weak_rng(void);
|
||||
int relay_crypt(circuit_t *circ, cell_t *cell, cell_direction_t cell_direction,
|
||||
crypt_path_t **layer_hint, char *recognized);
|
||||
|
||||
circid_t packed_cell_get_circid(const packed_cell_t *cell, int wide_circ_ids);
|
||||
|
||||
#ifdef RELAY_PRIVATE
|
||||
STATIC int connected_cell_parse(const relay_header_t *rh, const cell_t *cell,
|
||||
tor_addr_t *addr_out, int *ttl_out);
|
||||
|
Loading…
Reference in New Issue
Block a user