Bug 3443: Don't count ORconn setup in circuit build time.

Also, add a hack Roger suggested where we're more patient if no circuits are
opened yet.
This commit is contained in:
Mike Perry 2012-10-25 17:43:10 -07:00 committed by Nick Mathewson
parent bd93ff8dd7
commit 42e3c04a7a
9 changed files with 157 additions and 33 deletions

11
changes/bug3443 Normal file
View File

@ -0,0 +1,11 @@
o Minor bugfixes
- Alter circuit build timeout measurement to start at the point
where we begin the CREATE/CREATE_FAST step (as opposed to circuit
initialization). This should make our timeout measurements more
uniform. Previously, we were sometimes including ORconn setup time
in our circuit build time measurements. Fixes bug #3443.
o Minor features
- If we have no circuits open, use a relaxed timeout (the 95-percentile
cutoff) until a circuit succeeds. This should allow Tor to succeed
building circuits if the network connection drastically changes.

View File

@ -522,6 +522,18 @@ circuit_deliver_create_cell(circuit_t *circ, uint8_t cell_type,
CELL_DIRECTION_OUT, 0); CELL_DIRECTION_OUT, 0);
if (CIRCUIT_IS_ORIGIN(circ)) { if (CIRCUIT_IS_ORIGIN(circ)) {
/* Update began timestamp for circuits starting their first hop */
if (TO_ORIGIN_CIRCUIT(circ)->cpath->state == CPATH_STATE_CLOSED) {
if (circ->n_chan->state != CHANNEL_STATE_OPEN) {
log_warn(LD_CIRC,
"Got first hop for a circuit without an opened channel. "
"State: %s.", channel_state_to_string(circ->n_chan->state));
tor_fragile_assert();
}
tor_gettimeofday(&circ->timestamp_began);
}
/* mark it so it gets better rate limiting treatment. */ /* mark it so it gets better rate limiting treatment. */
channel_timestamp_client(circ->n_chan); channel_timestamp_client(circ->n_chan);
} }
@ -672,7 +684,7 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
struct timeval end; struct timeval end;
long timediff; long timediff;
tor_gettimeofday(&end); tor_gettimeofday(&end);
timediff = tv_mdiff(&circ->base_.timestamp_created, &end); timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
/* /*
* If the circuit build time is much greater than we would have cut * If the circuit build time is much greater than we would have cut

View File

@ -555,6 +555,11 @@ init_circuit_base(circuit_t *circ)
{ {
tor_gettimeofday(&circ->timestamp_created); tor_gettimeofday(&circ->timestamp_created);
// Gets reset when we send CREATE_FAST.
// circuit_expire_building() expects these to be equal
// until the orconn is built.
circ->timestamp_began = circ->timestamp_created;
circ->package_window = circuit_initial_package_window(); circ->package_window = circuit_initial_package_window();
circ->deliver_window = CIRCWINDOW_START; circ->deliver_window = CIRCWINDOW_START;
@ -777,7 +782,7 @@ circuit_dump_conn_details(int severity,
"state %d (%s), born %ld:", "state %d (%s), born %ld:",
conn_array_index, type, this_circid, other_circid, circ->state, conn_array_index, type, this_circid, other_circid, circ->state,
circuit_state_to_string(circ->state), circuit_state_to_string(circ->state),
(long)circ->timestamp_created.tv_sec); (long)circ->timestamp_began.tv_sec);
if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */ if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */
circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ)); circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ));
} }
@ -840,7 +845,7 @@ circuit_dump_chan_details(int severity,
"state %d (%s), born %ld:", "state %d (%s), born %ld:",
chan, type, this_circid, other_circid, circ->state, chan, type, this_circid, other_circid, circ->state,
circuit_state_to_string(circ->state), circuit_state_to_string(circ->state),
(long)circ->timestamp_created.tv_sec); (long)circ->timestamp_began.tv_sec);
if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */ if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */
circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ)); circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ));
} }

View File

@ -1469,11 +1469,6 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
max_time = circuit_build_times_max(cbt); max_time = circuit_build_times_max(cbt);
/* Sometimes really fast guard nodes give us such a steep curve
* that this ends up being not that much greater than timeout_ms.
* Make it be at least 1 min to handle this case. */
cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
if (cbt->timeout_ms > max_time) { if (cbt->timeout_ms > max_time) {
log_info(LD_CIRC, log_info(LD_CIRC,
"Circuit build timeout of %dms is beyond the maximum build " "Circuit build timeout of %dms is beyond the maximum build "
@ -1490,6 +1485,11 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
cbt->close_ms = 2*max_time; cbt->close_ms = 2*max_time;
} }
/* Sometimes really fast guard nodes give us such a steep curve
* that this ends up being not that much greater than timeout_ms.
* Make it be at least 1 min to handle this case. */
cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
cbt->have_computed_timeout = 1; cbt->have_computed_timeout = 1;
return 1; return 1;
} }

View File

@ -176,6 +176,13 @@ circuit_is_better(const origin_circuit_t *oa, const origin_circuit_t *ob,
const uint8_t purpose = ENTRY_TO_CONN(conn)->purpose; const uint8_t purpose = ENTRY_TO_CONN(conn)->purpose;
int a_bits, b_bits; int a_bits, b_bits;
/* If one of the circuits was allowed to live due to relaxing its timeout,
* it is definitely worse (it's probably a much slower path). */
if (oa->relaxed_timeout && !ob->relaxed_timeout)
return 0; /* ob is better. It's not relaxed. */
if (!oa->relaxed_timeout && ob->relaxed_timeout)
return 1; /* oa is better. It's not relaxed. */
switch (purpose) { switch (purpose) {
case CIRCUIT_PURPOSE_C_GENERAL: case CIRCUIT_PURPOSE_C_GENERAL:
/* if it's used but less dirty it's best; /* if it's used but less dirty it's best;
@ -187,7 +194,7 @@ circuit_is_better(const origin_circuit_t *oa, const origin_circuit_t *ob,
return 1; return 1;
} else { } else {
if (a->timestamp_dirty || if (a->timestamp_dirty ||
timercmp(&a->timestamp_created, &b->timestamp_created, >)) timercmp(&a->timestamp_began, &b->timestamp_began, >))
return 1; return 1;
if (ob->build_state->is_internal) if (ob->build_state->is_internal)
/* XXX023 what the heck is this internal thing doing here. I /* XXX023 what the heck is this internal thing doing here. I
@ -279,7 +286,7 @@ circuit_get_best(const entry_connection_t *conn,
if (purpose == CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT && if (purpose == CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT &&
!must_be_open && circ->state != CIRCUIT_STATE_OPEN && !must_be_open && circ->state != CIRCUIT_STATE_OPEN &&
tv_mdiff(&now, &circ->timestamp_created) > circ_times.timeout_ms) { tv_mdiff(&now, &circ->timestamp_began) > circ_times.timeout_ms) {
intro_going_on_but_too_old = 1; intro_going_on_but_too_old = 1;
continue; continue;
} }
@ -365,8 +372,31 @@ circuit_expire_building(void)
const or_options_t *options = get_options(); const or_options_t *options = get_options();
struct timeval now; struct timeval now;
cpath_build_state_t *build_state; cpath_build_state_t *build_state;
int any_opened_circs = 0;
tor_gettimeofday(&now); tor_gettimeofday(&now);
/* Check to see if we have any opened circuits. If we don't,
* we want to be more lenient with timeouts, in case the
* user has relocated and/or changed network connections.
* See bug #3443. */
while (next_circ) {
if (!CIRCUIT_IS_ORIGIN(next_circ) || /* didn't originate here */
next_circ->marked_for_close) /* don't mess with marked circs */
continue;
if (TO_ORIGIN_CIRCUIT(next_circ)->has_opened &&
next_circ->state == CIRCUIT_STATE_OPEN &&
TO_ORIGIN_CIRCUIT(next_circ)->build_state &&
TO_ORIGIN_CIRCUIT(next_circ)->build_state->desired_path_len
== DEFAULT_ROUTE_LEN) {
any_opened_circs = 1;
break;
}
next_circ = next_circ->next;
}
next_circ = global_circuitlist;
#define SET_CUTOFF(target, msec) do { \ #define SET_CUTOFF(target, msec) do { \
long ms = tor_lround(msec); \ long ms = tor_lround(msec); \
struct timeval diff; \ struct timeval diff; \
@ -391,9 +421,21 @@ circuit_expire_building(void)
victim = next_circ; victim = next_circ;
next_circ = next_circ->next; next_circ = next_circ->next;
if (!CIRCUIT_IS_ORIGIN(victim) || /* didn't originate here */ if (!CIRCUIT_IS_ORIGIN(victim) || /* didn't originate here */
victim->marked_for_close) /* don't mess with marked circs */ victim->marked_for_close) /* don't mess with marked circs */
continue; continue;
/* If we haven't yet started the first hop, it means we don't have
* any orconns available, and thus have not started counting time yet
* for this circuit. See circuit_deliver_create_cell() and uses of
* timestamp_began.
*
* Continue to wait in this case. The ORConn should timeout
* independently and kill us then.
*/
if (TO_ORIGIN_CIRCUIT(victim)->cpath->state == CPATH_STATE_CLOSED) {
continue;
}
build_state = TO_ORIGIN_CIRCUIT(victim)->build_state; build_state = TO_ORIGIN_CIRCUIT(victim)->build_state;
if (build_state && build_state->onehop_tunnel) if (build_state && build_state->onehop_tunnel)
cutoff = begindir_cutoff; cutoff = begindir_cutoff;
@ -410,9 +452,40 @@ circuit_expire_building(void)
if (TO_ORIGIN_CIRCUIT(victim)->hs_circ_has_timed_out) if (TO_ORIGIN_CIRCUIT(victim)->hs_circ_has_timed_out)
cutoff = hs_extremely_old_cutoff; cutoff = hs_extremely_old_cutoff;
if (timercmp(&victim->timestamp_created, &cutoff, >)) if (timercmp(&victim->timestamp_began, &cutoff, >))
continue; /* it's still young, leave it alone */ continue; /* it's still young, leave it alone */
if (!any_opened_circs) {
/* It's still young enough that we wouldn't close it, right? */
if (timercmp(&victim->timestamp_began, &close_cutoff, >)) {
if (!TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout) {
int first_hop_succeeded = TO_ORIGIN_CIRCUIT(victim)->cpath->state
== CPATH_STATE_OPEN;
log_info(LD_CIRC,
"No circuits are opened. Relaxing timeout for "
"a circuit with channel state %s. %d guards are live.",
channel_state_to_string(victim->n_chan->state),
num_live_entry_guards());
/* We count the timeout here for CBT, because technically this
* was a timeout, and the timeout value needs to reset if we
* see enough of them. Note this means we also need to avoid
* double-counting below, too. */
circuit_build_times_count_timeout(&circ_times, first_hop_succeeded);
TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout = 1;
}
continue;
} else {
log_notice(LD_CIRC,
"No circuits are opened. Relaxed timeout for "
"a circuit with channel state %s to %ldms. "
"However, it appears the circuit has timed out anyway. "
"%d guards are live. ",
channel_state_to_string(victim->n_chan->state),
(long)circ_times.close_ms, num_live_entry_guards());
}
}
#if 0 #if 0
/* some debug logs, to help track bugs */ /* some debug logs, to help track bugs */
if (victim->purpose >= CIRCUIT_PURPOSE_C_INTRODUCING && if (victim->purpose >= CIRCUIT_PURPOSE_C_INTRODUCING &&
@ -489,9 +562,12 @@ circuit_expire_building(void)
/* Record this failure to check for too many timeouts /* Record this failure to check for too many timeouts
* in a row. This function does not record a time value yet * in a row. This function does not record a time value yet
* (we do that later); it only counts the fact that we did * (we do that later); it only counts the fact that we did
* have a timeout. */ * have a timeout. We also want to avoid double-counting
circuit_build_times_count_timeout(&circ_times, * already "relaxed" circuits, which are counted above. */
first_hop_succeeded); if (!TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout) {
circuit_build_times_count_timeout(&circ_times,
first_hop_succeeded);
}
continue; continue;
} }
@ -500,16 +576,16 @@ circuit_expire_building(void)
* it off at, we probably had a suspend event along this codepath, * it off at, we probably had a suspend event along this codepath,
* and we should discard the value. * and we should discard the value.
*/ */
if (timercmp(&victim->timestamp_created, &extremely_old_cutoff, <)) { if (timercmp(&victim->timestamp_began, &extremely_old_cutoff, <)) {
log_notice(LD_CIRC, log_notice(LD_CIRC,
"Extremely large value for circuit build timeout: %lds. " "Extremely large value for circuit build timeout: %lds. "
"Assuming clock jump. Purpose %d (%s)", "Assuming clock jump. Purpose %d (%s)",
(long)(now.tv_sec - victim->timestamp_created.tv_sec), (long)(now.tv_sec - victim->timestamp_began.tv_sec),
victim->purpose, victim->purpose,
circuit_purpose_to_string(victim->purpose)); circuit_purpose_to_string(victim->purpose));
} else if (circuit_build_times_count_close(&circ_times, } else if (circuit_build_times_count_close(&circ_times,
first_hop_succeeded, first_hop_succeeded,
victim->timestamp_created.tv_sec)) { victim->timestamp_began.tv_sec)) {
circuit_build_times_set_timeout(&circ_times); circuit_build_times_set_timeout(&circ_times);
} }
} }
@ -791,7 +867,7 @@ circuit_build_needed_circs(time_t now)
circ = circuit_get_youngest_clean_open(CIRCUIT_PURPOSE_C_GENERAL); circ = circuit_get_youngest_clean_open(CIRCUIT_PURPOSE_C_GENERAL);
if (get_options()->RunTesting && if (get_options()->RunTesting &&
circ && circ &&
circ->timestamp_created.tv_sec + TESTING_CIRCUIT_INTERVAL < now) { circ->timestamp_began.tv_sec + TESTING_CIRCUIT_INTERVAL < now) {
log_fn(LOG_INFO,"Creating a new testing circuit."); log_fn(LOG_INFO,"Creating a new testing circuit.");
circuit_launch(CIRCUIT_PURPOSE_C_GENERAL, 0); circuit_launch(CIRCUIT_PURPOSE_C_GENERAL, 0);
} }
@ -911,7 +987,7 @@ circuit_expire_old_circuits_clientside(void)
circ->purpose); circ->purpose);
circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED); circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED);
} else if (!circ->timestamp_dirty && circ->state == CIRCUIT_STATE_OPEN) { } else if (!circ->timestamp_dirty && circ->state == CIRCUIT_STATE_OPEN) {
if (timercmp(&circ->timestamp_created, &cutoff, <)) { if (timercmp(&circ->timestamp_began, &cutoff, <)) {
if (circ->purpose == CIRCUIT_PURPOSE_C_GENERAL || if (circ->purpose == CIRCUIT_PURPOSE_C_GENERAL ||
circ->purpose == CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT || circ->purpose == CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT ||
circ->purpose == CIRCUIT_PURPOSE_S_ESTABLISH_INTRO || circ->purpose == CIRCUIT_PURPOSE_S_ESTABLISH_INTRO ||
@ -921,7 +997,7 @@ circuit_expire_old_circuits_clientside(void)
circ->purpose == CIRCUIT_PURPOSE_S_CONNECT_REND) { circ->purpose == CIRCUIT_PURPOSE_S_CONNECT_REND) {
log_debug(LD_CIRC, log_debug(LD_CIRC,
"Closing circuit that has been unused for %ld msec.", "Closing circuit that has been unused for %ld msec.",
tv_mdiff(&circ->timestamp_created, &now)); tv_mdiff(&circ->timestamp_began, &now));
circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED); circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED);
} else if (!TO_ORIGIN_CIRCUIT(circ)->is_ancient) { } else if (!TO_ORIGIN_CIRCUIT(circ)->is_ancient) {
/* Server-side rend joined circuits can end up really old, because /* Server-side rend joined circuits can end up really old, because
@ -935,7 +1011,7 @@ circuit_expire_old_circuits_clientside(void)
"Ancient non-dirty circuit %d is still around after " "Ancient non-dirty circuit %d is still around after "
"%ld milliseconds. Purpose: %d (%s)", "%ld milliseconds. Purpose: %d (%s)",
TO_ORIGIN_CIRCUIT(circ)->global_identifier, TO_ORIGIN_CIRCUIT(circ)->global_identifier,
tv_mdiff(&circ->timestamp_created, &now), tv_mdiff(&circ->timestamp_began, &now),
circ->purpose, circ->purpose,
circuit_purpose_to_string(circ->purpose)); circuit_purpose_to_string(circ->purpose));
TO_ORIGIN_CIRCUIT(circ)->is_ancient = 1; TO_ORIGIN_CIRCUIT(circ)->is_ancient = 1;
@ -1316,20 +1392,24 @@ circuit_launch_by_extend_info(uint8_t purpose,
circ = circuit_find_to_cannibalize(purpose, extend_info, flags); circ = circuit_find_to_cannibalize(purpose, extend_info, flags);
if (circ) { if (circ) {
uint8_t old_purpose = circ->base_.purpose; uint8_t old_purpose = circ->base_.purpose;
struct timeval old_timestamp_created; struct timeval old_timestamp_began;
log_info(LD_CIRC,"Cannibalizing circ '%s' for purpose %d (%s)", log_info(LD_CIRC,"Cannibalizing circ '%s' for purpose %d (%s)",
build_state_get_exit_nickname(circ->build_state), purpose, build_state_get_exit_nickname(circ->build_state), purpose,
circuit_purpose_to_string(purpose)); circuit_purpose_to_string(purpose));
circuit_change_purpose(TO_CIRCUIT(circ), purpose); circuit_change_purpose(TO_CIRCUIT(circ), purpose);
/* reset the birth date of this circ, else expire_building /* Reset the start date of this circ, else expire_building
* will see it and think it's been trying to build since it * will see it and think it's been trying to build since it
* began. */ * began.
tor_gettimeofday(&circ->base_.timestamp_created); *
* Technically, the code should reset this when the
* create cell is finally sent, but we're close enough
* here. */
tor_gettimeofday(&circ->base_.timestamp_began);
control_event_circuit_cannibalized(circ, old_purpose, control_event_circuit_cannibalized(circ, old_purpose,
&old_timestamp_created); &old_timestamp_began);
switch (purpose) { switch (purpose) {
case CIRCUIT_PURPOSE_C_ESTABLISH_REND: case CIRCUIT_PURPOSE_C_ESTABLISH_REND:

View File

@ -3578,9 +3578,9 @@ control_event_circuit_status_minor(origin_circuit_t *circ,
/* event_tail can currently be up to 130 chars long */ /* event_tail can currently be up to 130 chars long */
const char *hs_state_str = const char *hs_state_str =
circuit_purpose_to_controller_hs_state_string(purpose); circuit_purpose_to_controller_hs_state_string(purpose);
const struct timeval *old_timestamp_created = tv; const struct timeval *old_timestamp_began = tv;
char tbuf[ISO_TIME_USEC_LEN+1]; char tbuf[ISO_TIME_USEC_LEN+1];
format_iso_time_nospace_usec(tbuf, old_timestamp_created); format_iso_time_nospace_usec(tbuf, old_timestamp_began);
tor_snprintf(event_tail, sizeof(event_tail), tor_snprintf(event_tail, sizeof(event_tail),
" OLD_PURPOSE=%s%s%s OLD_TIME_CREATED=%s", " OLD_PURPOSE=%s%s%s OLD_TIME_CREATED=%s",

View File

@ -219,7 +219,7 @@ entry_is_live(entry_guard_t *e, int need_uptime, int need_capacity,
} }
/** Return the number of entry guards that we think are usable. */ /** Return the number of entry guards that we think are usable. */
static int int
num_live_entry_guards(void) num_live_entry_guards(void)
{ {
int n = 0; int n = 0;

View File

@ -52,6 +52,7 @@ typedef struct entry_guard_t {
entry_guard_t *entry_guard_get_by_id_digest(const char *digest); entry_guard_t *entry_guard_get_by_id_digest(const char *digest);
void entry_guards_changed(void); void entry_guards_changed(void);
const smartlist_t *get_entry_guards(void); const smartlist_t *get_entry_guards(void);
int num_live_entry_guards(void);
#endif #endif

View File

@ -2702,10 +2702,21 @@ typedef struct circuit_t {
* length ONIONSKIN_CHALLENGE_LEN. */ * length ONIONSKIN_CHALLENGE_LEN. */
char *n_chan_onionskin; char *n_chan_onionskin;
/** When was this circuit created? We keep this timestamp with a higher /** When did circuit construction actually begin (ie send the
* resolution than most so that the circuit-build-time tracking code can * CREATE cell or begin cannibalization).
* get millisecond resolution. */ *
* Note: This timer will get reset if we decide to cannibalize
* a circuit. It may also get reset during certain phases of hidden
* service circuit use.
*
* We keep this timestamp with a higher resolution than most so that the
* circuit-build-time tracking code can get millisecond resolution.
*/
struct timeval timestamp_began;
/** This timestamp marks when the init_circuit_base constructor ran. */
struct timeval timestamp_created; struct timeval timestamp_created;
/** When the circuit was first used, or 0 if the circuit is clean. /** When the circuit was first used, or 0 if the circuit is clean.
* *
* XXXX023 Note that some code will artifically adjust this value backward * XXXX023 Note that some code will artifically adjust this value backward
@ -2815,6 +2826,10 @@ typedef struct origin_circuit_t {
* service-side introduction circuits never have this flag set.) */ * service-side introduction circuits never have this flag set.) */
unsigned int hs_circ_has_timed_out : 1; unsigned int hs_circ_has_timed_out : 1;
/** Set iff this circuit has been given a relaxed timeout because
* no circuits have opened. Used to prevent spamming logs. */
unsigned int relaxed_timeout : 1;
/** Set iff this is a service-side rendezvous circuit for which a /** Set iff this is a service-side rendezvous circuit for which a
* new connection attempt has been launched. We consider launching * new connection attempt has been launched. We consider launching
* a new service-side rend circ to a client when the previous one * a new service-side rend circ to a client when the previous one