Bug 3443: Don't count ORconn setup in circuit build time.

Also, add a hack Roger suggested where we're more patient if no circuits are
opened yet.
This commit is contained in:
Mike Perry 2012-10-25 17:43:10 -07:00 committed by Nick Mathewson
parent bd93ff8dd7
commit 42e3c04a7a
9 changed files with 157 additions and 33 deletions

11
changes/bug3443 Normal file
View File

@ -0,0 +1,11 @@
o Minor bugfixes
- Alter circuit build timeout measurement to start at the point
where we begin the CREATE/CREATE_FAST step (as opposed to circuit
initialization). This should make our timeout measurements more
uniform. Previously, we were sometimes including ORconn setup time
in our circuit build time measurements. Fixes bug #3443.
o Minor features
- If we have no circuits open, use a relaxed timeout (the 95-percentile
cutoff) until a circuit succeeds. This should allow Tor to succeed
building circuits if the network connection drastically changes.

View File

@ -522,6 +522,18 @@ circuit_deliver_create_cell(circuit_t *circ, uint8_t cell_type,
CELL_DIRECTION_OUT, 0);
if (CIRCUIT_IS_ORIGIN(circ)) {
/* Update began timestamp for circuits starting their first hop */
if (TO_ORIGIN_CIRCUIT(circ)->cpath->state == CPATH_STATE_CLOSED) {
if (circ->n_chan->state != CHANNEL_STATE_OPEN) {
log_warn(LD_CIRC,
"Got first hop for a circuit without an opened channel. "
"State: %s.", channel_state_to_string(circ->n_chan->state));
tor_fragile_assert();
}
tor_gettimeofday(&circ->timestamp_began);
}
/* mark it so it gets better rate limiting treatment. */
channel_timestamp_client(circ->n_chan);
}
@ -672,7 +684,7 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
struct timeval end;
long timediff;
tor_gettimeofday(&end);
timediff = tv_mdiff(&circ->base_.timestamp_created, &end);
timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
/*
* If the circuit build time is much greater than we would have cut

View File

@ -555,6 +555,11 @@ init_circuit_base(circuit_t *circ)
{
tor_gettimeofday(&circ->timestamp_created);
// Gets reset when we send CREATE_FAST.
// circuit_expire_building() expects these to be equal
// until the orconn is built.
circ->timestamp_began = circ->timestamp_created;
circ->package_window = circuit_initial_package_window();
circ->deliver_window = CIRCWINDOW_START;
@ -777,7 +782,7 @@ circuit_dump_conn_details(int severity,
"state %d (%s), born %ld:",
conn_array_index, type, this_circid, other_circid, circ->state,
circuit_state_to_string(circ->state),
(long)circ->timestamp_created.tv_sec);
(long)circ->timestamp_began.tv_sec);
if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */
circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ));
}
@ -840,7 +845,7 @@ circuit_dump_chan_details(int severity,
"state %d (%s), born %ld:",
chan, type, this_circid, other_circid, circ->state,
circuit_state_to_string(circ->state),
(long)circ->timestamp_created.tv_sec);
(long)circ->timestamp_began.tv_sec);
if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */
circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ));
}

View File

@ -1469,11 +1469,6 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
max_time = circuit_build_times_max(cbt);
/* Sometimes really fast guard nodes give us such a steep curve
* that this ends up being not that much greater than timeout_ms.
* Make it be at least 1 min to handle this case. */
cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
if (cbt->timeout_ms > max_time) {
log_info(LD_CIRC,
"Circuit build timeout of %dms is beyond the maximum build "
@ -1490,6 +1485,11 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
cbt->close_ms = 2*max_time;
}
/* Sometimes really fast guard nodes give us such a steep curve
* that this ends up being not that much greater than timeout_ms.
* Make it be at least 1 min to handle this case. */
cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
cbt->have_computed_timeout = 1;
return 1;
}

View File

@ -176,6 +176,13 @@ circuit_is_better(const origin_circuit_t *oa, const origin_circuit_t *ob,
const uint8_t purpose = ENTRY_TO_CONN(conn)->purpose;
int a_bits, b_bits;
/* If one of the circuits was allowed to live due to relaxing its timeout,
* it is definitely worse (it's probably a much slower path). */
if (oa->relaxed_timeout && !ob->relaxed_timeout)
return 0; /* ob is better. It's not relaxed. */
if (!oa->relaxed_timeout && ob->relaxed_timeout)
return 1; /* oa is better. It's not relaxed. */
switch (purpose) {
case CIRCUIT_PURPOSE_C_GENERAL:
/* if it's used but less dirty it's best;
@ -187,7 +194,7 @@ circuit_is_better(const origin_circuit_t *oa, const origin_circuit_t *ob,
return 1;
} else {
if (a->timestamp_dirty ||
timercmp(&a->timestamp_created, &b->timestamp_created, >))
timercmp(&a->timestamp_began, &b->timestamp_began, >))
return 1;
if (ob->build_state->is_internal)
/* XXX023 what the heck is this internal thing doing here. I
@ -279,7 +286,7 @@ circuit_get_best(const entry_connection_t *conn,
if (purpose == CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT &&
!must_be_open && circ->state != CIRCUIT_STATE_OPEN &&
tv_mdiff(&now, &circ->timestamp_created) > circ_times.timeout_ms) {
tv_mdiff(&now, &circ->timestamp_began) > circ_times.timeout_ms) {
intro_going_on_but_too_old = 1;
continue;
}
@ -365,8 +372,31 @@ circuit_expire_building(void)
const or_options_t *options = get_options();
struct timeval now;
cpath_build_state_t *build_state;
int any_opened_circs = 0;
tor_gettimeofday(&now);
/* Check to see if we have any opened circuits. If we don't,
* we want to be more lenient with timeouts, in case the
* user has relocated and/or changed network connections.
* See bug #3443. */
while (next_circ) {
if (!CIRCUIT_IS_ORIGIN(next_circ) || /* didn't originate here */
next_circ->marked_for_close) /* don't mess with marked circs */
continue;
if (TO_ORIGIN_CIRCUIT(next_circ)->has_opened &&
next_circ->state == CIRCUIT_STATE_OPEN &&
TO_ORIGIN_CIRCUIT(next_circ)->build_state &&
TO_ORIGIN_CIRCUIT(next_circ)->build_state->desired_path_len
== DEFAULT_ROUTE_LEN) {
any_opened_circs = 1;
break;
}
next_circ = next_circ->next;
}
next_circ = global_circuitlist;
#define SET_CUTOFF(target, msec) do { \
long ms = tor_lround(msec); \
struct timeval diff; \
@ -391,9 +421,21 @@ circuit_expire_building(void)
victim = next_circ;
next_circ = next_circ->next;
if (!CIRCUIT_IS_ORIGIN(victim) || /* didn't originate here */
victim->marked_for_close) /* don't mess with marked circs */
victim->marked_for_close) /* don't mess with marked circs */
continue;
/* If we haven't yet started the first hop, it means we don't have
* any orconns available, and thus have not started counting time yet
* for this circuit. See circuit_deliver_create_cell() and uses of
* timestamp_began.
*
* Continue to wait in this case. The ORConn should timeout
* independently and kill us then.
*/
if (TO_ORIGIN_CIRCUIT(victim)->cpath->state == CPATH_STATE_CLOSED) {
continue;
}
build_state = TO_ORIGIN_CIRCUIT(victim)->build_state;
if (build_state && build_state->onehop_tunnel)
cutoff = begindir_cutoff;
@ -410,9 +452,40 @@ circuit_expire_building(void)
if (TO_ORIGIN_CIRCUIT(victim)->hs_circ_has_timed_out)
cutoff = hs_extremely_old_cutoff;
if (timercmp(&victim->timestamp_created, &cutoff, >))
if (timercmp(&victim->timestamp_began, &cutoff, >))
continue; /* it's still young, leave it alone */
if (!any_opened_circs) {
/* It's still young enough that we wouldn't close it, right? */
if (timercmp(&victim->timestamp_began, &close_cutoff, >)) {
if (!TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout) {
int first_hop_succeeded = TO_ORIGIN_CIRCUIT(victim)->cpath->state
== CPATH_STATE_OPEN;
log_info(LD_CIRC,
"No circuits are opened. Relaxing timeout for "
"a circuit with channel state %s. %d guards are live.",
channel_state_to_string(victim->n_chan->state),
num_live_entry_guards());
/* We count the timeout here for CBT, because technically this
* was a timeout, and the timeout value needs to reset if we
* see enough of them. Note this means we also need to avoid
* double-counting below, too. */
circuit_build_times_count_timeout(&circ_times, first_hop_succeeded);
TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout = 1;
}
continue;
} else {
log_notice(LD_CIRC,
"No circuits are opened. Relaxed timeout for "
"a circuit with channel state %s to %ldms. "
"However, it appears the circuit has timed out anyway. "
"%d guards are live. ",
channel_state_to_string(victim->n_chan->state),
(long)circ_times.close_ms, num_live_entry_guards());
}
}
#if 0
/* some debug logs, to help track bugs */
if (victim->purpose >= CIRCUIT_PURPOSE_C_INTRODUCING &&
@ -489,9 +562,12 @@ circuit_expire_building(void)
/* Record this failure to check for too many timeouts
* in a row. This function does not record a time value yet
* (we do that later); it only counts the fact that we did
* have a timeout. */
circuit_build_times_count_timeout(&circ_times,
first_hop_succeeded);
* have a timeout. We also want to avoid double-counting
* already "relaxed" circuits, which are counted above. */
if (!TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout) {
circuit_build_times_count_timeout(&circ_times,
first_hop_succeeded);
}
continue;
}
@ -500,16 +576,16 @@ circuit_expire_building(void)
* it off at, we probably had a suspend event along this codepath,
* and we should discard the value.
*/
if (timercmp(&victim->timestamp_created, &extremely_old_cutoff, <)) {
if (timercmp(&victim->timestamp_began, &extremely_old_cutoff, <)) {
log_notice(LD_CIRC,
"Extremely large value for circuit build timeout: %lds. "
"Assuming clock jump. Purpose %d (%s)",
(long)(now.tv_sec - victim->timestamp_created.tv_sec),
(long)(now.tv_sec - victim->timestamp_began.tv_sec),
victim->purpose,
circuit_purpose_to_string(victim->purpose));
} else if (circuit_build_times_count_close(&circ_times,
first_hop_succeeded,
victim->timestamp_created.tv_sec)) {
victim->timestamp_began.tv_sec)) {
circuit_build_times_set_timeout(&circ_times);
}
}
@ -791,7 +867,7 @@ circuit_build_needed_circs(time_t now)
circ = circuit_get_youngest_clean_open(CIRCUIT_PURPOSE_C_GENERAL);
if (get_options()->RunTesting &&
circ &&
circ->timestamp_created.tv_sec + TESTING_CIRCUIT_INTERVAL < now) {
circ->timestamp_began.tv_sec + TESTING_CIRCUIT_INTERVAL < now) {
log_fn(LOG_INFO,"Creating a new testing circuit.");
circuit_launch(CIRCUIT_PURPOSE_C_GENERAL, 0);
}
@ -911,7 +987,7 @@ circuit_expire_old_circuits_clientside(void)
circ->purpose);
circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED);
} else if (!circ->timestamp_dirty && circ->state == CIRCUIT_STATE_OPEN) {
if (timercmp(&circ->timestamp_created, &cutoff, <)) {
if (timercmp(&circ->timestamp_began, &cutoff, <)) {
if (circ->purpose == CIRCUIT_PURPOSE_C_GENERAL ||
circ->purpose == CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT ||
circ->purpose == CIRCUIT_PURPOSE_S_ESTABLISH_INTRO ||
@ -921,7 +997,7 @@ circuit_expire_old_circuits_clientside(void)
circ->purpose == CIRCUIT_PURPOSE_S_CONNECT_REND) {
log_debug(LD_CIRC,
"Closing circuit that has been unused for %ld msec.",
tv_mdiff(&circ->timestamp_created, &now));
tv_mdiff(&circ->timestamp_began, &now));
circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED);
} else if (!TO_ORIGIN_CIRCUIT(circ)->is_ancient) {
/* Server-side rend joined circuits can end up really old, because
@ -935,7 +1011,7 @@ circuit_expire_old_circuits_clientside(void)
"Ancient non-dirty circuit %d is still around after "
"%ld milliseconds. Purpose: %d (%s)",
TO_ORIGIN_CIRCUIT(circ)->global_identifier,
tv_mdiff(&circ->timestamp_created, &now),
tv_mdiff(&circ->timestamp_began, &now),
circ->purpose,
circuit_purpose_to_string(circ->purpose));
TO_ORIGIN_CIRCUIT(circ)->is_ancient = 1;
@ -1316,20 +1392,24 @@ circuit_launch_by_extend_info(uint8_t purpose,
circ = circuit_find_to_cannibalize(purpose, extend_info, flags);
if (circ) {
uint8_t old_purpose = circ->base_.purpose;
struct timeval old_timestamp_created;
struct timeval old_timestamp_began;
log_info(LD_CIRC,"Cannibalizing circ '%s' for purpose %d (%s)",
build_state_get_exit_nickname(circ->build_state), purpose,
circuit_purpose_to_string(purpose));
circuit_change_purpose(TO_CIRCUIT(circ), purpose);
/* reset the birth date of this circ, else expire_building
/* Reset the start date of this circ, else expire_building
* will see it and think it's been trying to build since it
* began. */
tor_gettimeofday(&circ->base_.timestamp_created);
* began.
*
* Technically, the code should reset this when the
* create cell is finally sent, but we're close enough
* here. */
tor_gettimeofday(&circ->base_.timestamp_began);
control_event_circuit_cannibalized(circ, old_purpose,
&old_timestamp_created);
&old_timestamp_began);
switch (purpose) {
case CIRCUIT_PURPOSE_C_ESTABLISH_REND:

View File

@ -3578,9 +3578,9 @@ control_event_circuit_status_minor(origin_circuit_t *circ,
/* event_tail can currently be up to 130 chars long */
const char *hs_state_str =
circuit_purpose_to_controller_hs_state_string(purpose);
const struct timeval *old_timestamp_created = tv;
const struct timeval *old_timestamp_began = tv;
char tbuf[ISO_TIME_USEC_LEN+1];
format_iso_time_nospace_usec(tbuf, old_timestamp_created);
format_iso_time_nospace_usec(tbuf, old_timestamp_began);
tor_snprintf(event_tail, sizeof(event_tail),
" OLD_PURPOSE=%s%s%s OLD_TIME_CREATED=%s",

View File

@ -219,7 +219,7 @@ entry_is_live(entry_guard_t *e, int need_uptime, int need_capacity,
}
/** Return the number of entry guards that we think are usable. */
static int
int
num_live_entry_guards(void)
{
int n = 0;

View File

@ -52,6 +52,7 @@ typedef struct entry_guard_t {
entry_guard_t *entry_guard_get_by_id_digest(const char *digest);
void entry_guards_changed(void);
const smartlist_t *get_entry_guards(void);
int num_live_entry_guards(void);
#endif

View File

@ -2702,10 +2702,21 @@ typedef struct circuit_t {
* length ONIONSKIN_CHALLENGE_LEN. */
char *n_chan_onionskin;
/** When was this circuit created? We keep this timestamp with a higher
* resolution than most so that the circuit-build-time tracking code can
* get millisecond resolution. */
/** When did circuit construction actually begin (ie send the
* CREATE cell or begin cannibalization).
*
* Note: This timer will get reset if we decide to cannibalize
* a circuit. It may also get reset during certain phases of hidden
* service circuit use.
*
* We keep this timestamp with a higher resolution than most so that the
* circuit-build-time tracking code can get millisecond resolution.
*/
struct timeval timestamp_began;
/** This timestamp marks when the init_circuit_base constructor ran. */
struct timeval timestamp_created;
/** When the circuit was first used, or 0 if the circuit is clean.
*
* XXXX023 Note that some code will artifically adjust this value backward
@ -2815,6 +2826,10 @@ typedef struct origin_circuit_t {
* service-side introduction circuits never have this flag set.) */
unsigned int hs_circ_has_timed_out : 1;
/** Set iff this circuit has been given a relaxed timeout because
* no circuits have opened. Used to prevent spamming logs. */
unsigned int relaxed_timeout : 1;
/** Set iff this is a service-side rendezvous circuit for which a
* new connection attempt has been launched. We consider launching
* a new service-side rend circ to a client when the previous one