mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-11 05:33:47 +01:00
Bug 3443: Don't count ORconn setup in circuit build time.
Also, add a hack Roger suggested where we're more patient if no circuits are opened yet.
This commit is contained in:
parent
bd93ff8dd7
commit
42e3c04a7a
11
changes/bug3443
Normal file
11
changes/bug3443
Normal file
@ -0,0 +1,11 @@
|
||||
o Minor bugfixes
|
||||
- Alter circuit build timeout measurement to start at the point
|
||||
where we begin the CREATE/CREATE_FAST step (as opposed to circuit
|
||||
initialization). This should make our timeout measurements more
|
||||
uniform. Previously, we were sometimes including ORconn setup time
|
||||
in our circuit build time measurements. Fixes bug #3443.
|
||||
|
||||
o Minor features
|
||||
- If we have no circuits open, use a relaxed timeout (the 95-percentile
|
||||
cutoff) until a circuit succeeds. This should allow Tor to succeed
|
||||
building circuits if the network connection drastically changes.
|
@ -522,6 +522,18 @@ circuit_deliver_create_cell(circuit_t *circ, uint8_t cell_type,
|
||||
CELL_DIRECTION_OUT, 0);
|
||||
|
||||
if (CIRCUIT_IS_ORIGIN(circ)) {
|
||||
/* Update began timestamp for circuits starting their first hop */
|
||||
if (TO_ORIGIN_CIRCUIT(circ)->cpath->state == CPATH_STATE_CLOSED) {
|
||||
if (circ->n_chan->state != CHANNEL_STATE_OPEN) {
|
||||
log_warn(LD_CIRC,
|
||||
"Got first hop for a circuit without an opened channel. "
|
||||
"State: %s.", channel_state_to_string(circ->n_chan->state));
|
||||
tor_fragile_assert();
|
||||
}
|
||||
|
||||
tor_gettimeofday(&circ->timestamp_began);
|
||||
}
|
||||
|
||||
/* mark it so it gets better rate limiting treatment. */
|
||||
channel_timestamp_client(circ->n_chan);
|
||||
}
|
||||
@ -672,7 +684,7 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
|
||||
struct timeval end;
|
||||
long timediff;
|
||||
tor_gettimeofday(&end);
|
||||
timediff = tv_mdiff(&circ->base_.timestamp_created, &end);
|
||||
timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
|
||||
|
||||
/*
|
||||
* If the circuit build time is much greater than we would have cut
|
||||
|
@ -555,6 +555,11 @@ init_circuit_base(circuit_t *circ)
|
||||
{
|
||||
tor_gettimeofday(&circ->timestamp_created);
|
||||
|
||||
// Gets reset when we send CREATE_FAST.
|
||||
// circuit_expire_building() expects these to be equal
|
||||
// until the orconn is built.
|
||||
circ->timestamp_began = circ->timestamp_created;
|
||||
|
||||
circ->package_window = circuit_initial_package_window();
|
||||
circ->deliver_window = CIRCWINDOW_START;
|
||||
|
||||
@ -777,7 +782,7 @@ circuit_dump_conn_details(int severity,
|
||||
"state %d (%s), born %ld:",
|
||||
conn_array_index, type, this_circid, other_circid, circ->state,
|
||||
circuit_state_to_string(circ->state),
|
||||
(long)circ->timestamp_created.tv_sec);
|
||||
(long)circ->timestamp_began.tv_sec);
|
||||
if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */
|
||||
circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ));
|
||||
}
|
||||
@ -840,7 +845,7 @@ circuit_dump_chan_details(int severity,
|
||||
"state %d (%s), born %ld:",
|
||||
chan, type, this_circid, other_circid, circ->state,
|
||||
circuit_state_to_string(circ->state),
|
||||
(long)circ->timestamp_created.tv_sec);
|
||||
(long)circ->timestamp_began.tv_sec);
|
||||
if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */
|
||||
circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ));
|
||||
}
|
||||
|
@ -1469,11 +1469,6 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
|
||||
|
||||
max_time = circuit_build_times_max(cbt);
|
||||
|
||||
/* Sometimes really fast guard nodes give us such a steep curve
|
||||
* that this ends up being not that much greater than timeout_ms.
|
||||
* Make it be at least 1 min to handle this case. */
|
||||
cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
|
||||
|
||||
if (cbt->timeout_ms > max_time) {
|
||||
log_info(LD_CIRC,
|
||||
"Circuit build timeout of %dms is beyond the maximum build "
|
||||
@ -1490,6 +1485,11 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
|
||||
cbt->close_ms = 2*max_time;
|
||||
}
|
||||
|
||||
/* Sometimes really fast guard nodes give us such a steep curve
|
||||
* that this ends up being not that much greater than timeout_ms.
|
||||
* Make it be at least 1 min to handle this case. */
|
||||
cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
|
||||
|
||||
cbt->have_computed_timeout = 1;
|
||||
return 1;
|
||||
}
|
||||
|
@ -176,6 +176,13 @@ circuit_is_better(const origin_circuit_t *oa, const origin_circuit_t *ob,
|
||||
const uint8_t purpose = ENTRY_TO_CONN(conn)->purpose;
|
||||
int a_bits, b_bits;
|
||||
|
||||
/* If one of the circuits was allowed to live due to relaxing its timeout,
|
||||
* it is definitely worse (it's probably a much slower path). */
|
||||
if (oa->relaxed_timeout && !ob->relaxed_timeout)
|
||||
return 0; /* ob is better. It's not relaxed. */
|
||||
if (!oa->relaxed_timeout && ob->relaxed_timeout)
|
||||
return 1; /* oa is better. It's not relaxed. */
|
||||
|
||||
switch (purpose) {
|
||||
case CIRCUIT_PURPOSE_C_GENERAL:
|
||||
/* if it's used but less dirty it's best;
|
||||
@ -187,7 +194,7 @@ circuit_is_better(const origin_circuit_t *oa, const origin_circuit_t *ob,
|
||||
return 1;
|
||||
} else {
|
||||
if (a->timestamp_dirty ||
|
||||
timercmp(&a->timestamp_created, &b->timestamp_created, >))
|
||||
timercmp(&a->timestamp_began, &b->timestamp_began, >))
|
||||
return 1;
|
||||
if (ob->build_state->is_internal)
|
||||
/* XXX023 what the heck is this internal thing doing here. I
|
||||
@ -279,7 +286,7 @@ circuit_get_best(const entry_connection_t *conn,
|
||||
|
||||
if (purpose == CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT &&
|
||||
!must_be_open && circ->state != CIRCUIT_STATE_OPEN &&
|
||||
tv_mdiff(&now, &circ->timestamp_created) > circ_times.timeout_ms) {
|
||||
tv_mdiff(&now, &circ->timestamp_began) > circ_times.timeout_ms) {
|
||||
intro_going_on_but_too_old = 1;
|
||||
continue;
|
||||
}
|
||||
@ -365,8 +372,31 @@ circuit_expire_building(void)
|
||||
const or_options_t *options = get_options();
|
||||
struct timeval now;
|
||||
cpath_build_state_t *build_state;
|
||||
int any_opened_circs = 0;
|
||||
|
||||
tor_gettimeofday(&now);
|
||||
|
||||
/* Check to see if we have any opened circuits. If we don't,
|
||||
* we want to be more lenient with timeouts, in case the
|
||||
* user has relocated and/or changed network connections.
|
||||
* See bug #3443. */
|
||||
while (next_circ) {
|
||||
if (!CIRCUIT_IS_ORIGIN(next_circ) || /* didn't originate here */
|
||||
next_circ->marked_for_close) /* don't mess with marked circs */
|
||||
continue;
|
||||
|
||||
if (TO_ORIGIN_CIRCUIT(next_circ)->has_opened &&
|
||||
next_circ->state == CIRCUIT_STATE_OPEN &&
|
||||
TO_ORIGIN_CIRCUIT(next_circ)->build_state &&
|
||||
TO_ORIGIN_CIRCUIT(next_circ)->build_state->desired_path_len
|
||||
== DEFAULT_ROUTE_LEN) {
|
||||
any_opened_circs = 1;
|
||||
break;
|
||||
}
|
||||
next_circ = next_circ->next;
|
||||
}
|
||||
next_circ = global_circuitlist;
|
||||
|
||||
#define SET_CUTOFF(target, msec) do { \
|
||||
long ms = tor_lround(msec); \
|
||||
struct timeval diff; \
|
||||
@ -391,9 +421,21 @@ circuit_expire_building(void)
|
||||
victim = next_circ;
|
||||
next_circ = next_circ->next;
|
||||
if (!CIRCUIT_IS_ORIGIN(victim) || /* didn't originate here */
|
||||
victim->marked_for_close) /* don't mess with marked circs */
|
||||
victim->marked_for_close) /* don't mess with marked circs */
|
||||
continue;
|
||||
|
||||
/* If we haven't yet started the first hop, it means we don't have
|
||||
* any orconns available, and thus have not started counting time yet
|
||||
* for this circuit. See circuit_deliver_create_cell() and uses of
|
||||
* timestamp_began.
|
||||
*
|
||||
* Continue to wait in this case. The ORConn should timeout
|
||||
* independently and kill us then.
|
||||
*/
|
||||
if (TO_ORIGIN_CIRCUIT(victim)->cpath->state == CPATH_STATE_CLOSED) {
|
||||
continue;
|
||||
}
|
||||
|
||||
build_state = TO_ORIGIN_CIRCUIT(victim)->build_state;
|
||||
if (build_state && build_state->onehop_tunnel)
|
||||
cutoff = begindir_cutoff;
|
||||
@ -410,9 +452,40 @@ circuit_expire_building(void)
|
||||
if (TO_ORIGIN_CIRCUIT(victim)->hs_circ_has_timed_out)
|
||||
cutoff = hs_extremely_old_cutoff;
|
||||
|
||||
if (timercmp(&victim->timestamp_created, &cutoff, >))
|
||||
if (timercmp(&victim->timestamp_began, &cutoff, >))
|
||||
continue; /* it's still young, leave it alone */
|
||||
|
||||
if (!any_opened_circs) {
|
||||
/* It's still young enough that we wouldn't close it, right? */
|
||||
if (timercmp(&victim->timestamp_began, &close_cutoff, >)) {
|
||||
if (!TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout) {
|
||||
int first_hop_succeeded = TO_ORIGIN_CIRCUIT(victim)->cpath->state
|
||||
== CPATH_STATE_OPEN;
|
||||
log_info(LD_CIRC,
|
||||
"No circuits are opened. Relaxing timeout for "
|
||||
"a circuit with channel state %s. %d guards are live.",
|
||||
channel_state_to_string(victim->n_chan->state),
|
||||
num_live_entry_guards());
|
||||
|
||||
/* We count the timeout here for CBT, because technically this
|
||||
* was a timeout, and the timeout value needs to reset if we
|
||||
* see enough of them. Note this means we also need to avoid
|
||||
* double-counting below, too. */
|
||||
circuit_build_times_count_timeout(&circ_times, first_hop_succeeded);
|
||||
TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout = 1;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
log_notice(LD_CIRC,
|
||||
"No circuits are opened. Relaxed timeout for "
|
||||
"a circuit with channel state %s to %ldms. "
|
||||
"However, it appears the circuit has timed out anyway. "
|
||||
"%d guards are live. ",
|
||||
channel_state_to_string(victim->n_chan->state),
|
||||
(long)circ_times.close_ms, num_live_entry_guards());
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* some debug logs, to help track bugs */
|
||||
if (victim->purpose >= CIRCUIT_PURPOSE_C_INTRODUCING &&
|
||||
@ -489,9 +562,12 @@ circuit_expire_building(void)
|
||||
/* Record this failure to check for too many timeouts
|
||||
* in a row. This function does not record a time value yet
|
||||
* (we do that later); it only counts the fact that we did
|
||||
* have a timeout. */
|
||||
circuit_build_times_count_timeout(&circ_times,
|
||||
first_hop_succeeded);
|
||||
* have a timeout. We also want to avoid double-counting
|
||||
* already "relaxed" circuits, which are counted above. */
|
||||
if (!TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout) {
|
||||
circuit_build_times_count_timeout(&circ_times,
|
||||
first_hop_succeeded);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -500,16 +576,16 @@ circuit_expire_building(void)
|
||||
* it off at, we probably had a suspend event along this codepath,
|
||||
* and we should discard the value.
|
||||
*/
|
||||
if (timercmp(&victim->timestamp_created, &extremely_old_cutoff, <)) {
|
||||
if (timercmp(&victim->timestamp_began, &extremely_old_cutoff, <)) {
|
||||
log_notice(LD_CIRC,
|
||||
"Extremely large value for circuit build timeout: %lds. "
|
||||
"Assuming clock jump. Purpose %d (%s)",
|
||||
(long)(now.tv_sec - victim->timestamp_created.tv_sec),
|
||||
(long)(now.tv_sec - victim->timestamp_began.tv_sec),
|
||||
victim->purpose,
|
||||
circuit_purpose_to_string(victim->purpose));
|
||||
} else if (circuit_build_times_count_close(&circ_times,
|
||||
first_hop_succeeded,
|
||||
victim->timestamp_created.tv_sec)) {
|
||||
victim->timestamp_began.tv_sec)) {
|
||||
circuit_build_times_set_timeout(&circ_times);
|
||||
}
|
||||
}
|
||||
@ -791,7 +867,7 @@ circuit_build_needed_circs(time_t now)
|
||||
circ = circuit_get_youngest_clean_open(CIRCUIT_PURPOSE_C_GENERAL);
|
||||
if (get_options()->RunTesting &&
|
||||
circ &&
|
||||
circ->timestamp_created.tv_sec + TESTING_CIRCUIT_INTERVAL < now) {
|
||||
circ->timestamp_began.tv_sec + TESTING_CIRCUIT_INTERVAL < now) {
|
||||
log_fn(LOG_INFO,"Creating a new testing circuit.");
|
||||
circuit_launch(CIRCUIT_PURPOSE_C_GENERAL, 0);
|
||||
}
|
||||
@ -911,7 +987,7 @@ circuit_expire_old_circuits_clientside(void)
|
||||
circ->purpose);
|
||||
circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED);
|
||||
} else if (!circ->timestamp_dirty && circ->state == CIRCUIT_STATE_OPEN) {
|
||||
if (timercmp(&circ->timestamp_created, &cutoff, <)) {
|
||||
if (timercmp(&circ->timestamp_began, &cutoff, <)) {
|
||||
if (circ->purpose == CIRCUIT_PURPOSE_C_GENERAL ||
|
||||
circ->purpose == CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT ||
|
||||
circ->purpose == CIRCUIT_PURPOSE_S_ESTABLISH_INTRO ||
|
||||
@ -921,7 +997,7 @@ circuit_expire_old_circuits_clientside(void)
|
||||
circ->purpose == CIRCUIT_PURPOSE_S_CONNECT_REND) {
|
||||
log_debug(LD_CIRC,
|
||||
"Closing circuit that has been unused for %ld msec.",
|
||||
tv_mdiff(&circ->timestamp_created, &now));
|
||||
tv_mdiff(&circ->timestamp_began, &now));
|
||||
circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED);
|
||||
} else if (!TO_ORIGIN_CIRCUIT(circ)->is_ancient) {
|
||||
/* Server-side rend joined circuits can end up really old, because
|
||||
@ -935,7 +1011,7 @@ circuit_expire_old_circuits_clientside(void)
|
||||
"Ancient non-dirty circuit %d is still around after "
|
||||
"%ld milliseconds. Purpose: %d (%s)",
|
||||
TO_ORIGIN_CIRCUIT(circ)->global_identifier,
|
||||
tv_mdiff(&circ->timestamp_created, &now),
|
||||
tv_mdiff(&circ->timestamp_began, &now),
|
||||
circ->purpose,
|
||||
circuit_purpose_to_string(circ->purpose));
|
||||
TO_ORIGIN_CIRCUIT(circ)->is_ancient = 1;
|
||||
@ -1316,20 +1392,24 @@ circuit_launch_by_extend_info(uint8_t purpose,
|
||||
circ = circuit_find_to_cannibalize(purpose, extend_info, flags);
|
||||
if (circ) {
|
||||
uint8_t old_purpose = circ->base_.purpose;
|
||||
struct timeval old_timestamp_created;
|
||||
struct timeval old_timestamp_began;
|
||||
|
||||
log_info(LD_CIRC,"Cannibalizing circ '%s' for purpose %d (%s)",
|
||||
build_state_get_exit_nickname(circ->build_state), purpose,
|
||||
circuit_purpose_to_string(purpose));
|
||||
|
||||
circuit_change_purpose(TO_CIRCUIT(circ), purpose);
|
||||
/* reset the birth date of this circ, else expire_building
|
||||
/* Reset the start date of this circ, else expire_building
|
||||
* will see it and think it's been trying to build since it
|
||||
* began. */
|
||||
tor_gettimeofday(&circ->base_.timestamp_created);
|
||||
* began.
|
||||
*
|
||||
* Technically, the code should reset this when the
|
||||
* create cell is finally sent, but we're close enough
|
||||
* here. */
|
||||
tor_gettimeofday(&circ->base_.timestamp_began);
|
||||
|
||||
control_event_circuit_cannibalized(circ, old_purpose,
|
||||
&old_timestamp_created);
|
||||
&old_timestamp_began);
|
||||
|
||||
switch (purpose) {
|
||||
case CIRCUIT_PURPOSE_C_ESTABLISH_REND:
|
||||
|
@ -3578,9 +3578,9 @@ control_event_circuit_status_minor(origin_circuit_t *circ,
|
||||
/* event_tail can currently be up to 130 chars long */
|
||||
const char *hs_state_str =
|
||||
circuit_purpose_to_controller_hs_state_string(purpose);
|
||||
const struct timeval *old_timestamp_created = tv;
|
||||
const struct timeval *old_timestamp_began = tv;
|
||||
char tbuf[ISO_TIME_USEC_LEN+1];
|
||||
format_iso_time_nospace_usec(tbuf, old_timestamp_created);
|
||||
format_iso_time_nospace_usec(tbuf, old_timestamp_began);
|
||||
|
||||
tor_snprintf(event_tail, sizeof(event_tail),
|
||||
" OLD_PURPOSE=%s%s%s OLD_TIME_CREATED=%s",
|
||||
|
@ -219,7 +219,7 @@ entry_is_live(entry_guard_t *e, int need_uptime, int need_capacity,
|
||||
}
|
||||
|
||||
/** Return the number of entry guards that we think are usable. */
|
||||
static int
|
||||
int
|
||||
num_live_entry_guards(void)
|
||||
{
|
||||
int n = 0;
|
||||
|
@ -52,6 +52,7 @@ typedef struct entry_guard_t {
|
||||
entry_guard_t *entry_guard_get_by_id_digest(const char *digest);
|
||||
void entry_guards_changed(void);
|
||||
const smartlist_t *get_entry_guards(void);
|
||||
int num_live_entry_guards(void);
|
||||
|
||||
#endif
|
||||
|
||||
|
21
src/or/or.h
21
src/or/or.h
@ -2702,10 +2702,21 @@ typedef struct circuit_t {
|
||||
* length ONIONSKIN_CHALLENGE_LEN. */
|
||||
char *n_chan_onionskin;
|
||||
|
||||
/** When was this circuit created? We keep this timestamp with a higher
|
||||
* resolution than most so that the circuit-build-time tracking code can
|
||||
* get millisecond resolution. */
|
||||
/** When did circuit construction actually begin (ie send the
|
||||
* CREATE cell or begin cannibalization).
|
||||
*
|
||||
* Note: This timer will get reset if we decide to cannibalize
|
||||
* a circuit. It may also get reset during certain phases of hidden
|
||||
* service circuit use.
|
||||
*
|
||||
* We keep this timestamp with a higher resolution than most so that the
|
||||
* circuit-build-time tracking code can get millisecond resolution.
|
||||
*/
|
||||
struct timeval timestamp_began;
|
||||
|
||||
/** This timestamp marks when the init_circuit_base constructor ran. */
|
||||
struct timeval timestamp_created;
|
||||
|
||||
/** When the circuit was first used, or 0 if the circuit is clean.
|
||||
*
|
||||
* XXXX023 Note that some code will artifically adjust this value backward
|
||||
@ -2815,6 +2826,10 @@ typedef struct origin_circuit_t {
|
||||
* service-side introduction circuits never have this flag set.) */
|
||||
unsigned int hs_circ_has_timed_out : 1;
|
||||
|
||||
/** Set iff this circuit has been given a relaxed timeout because
|
||||
* no circuits have opened. Used to prevent spamming logs. */
|
||||
unsigned int relaxed_timeout : 1;
|
||||
|
||||
/** Set iff this is a service-side rendezvous circuit for which a
|
||||
* new connection attempt has been launched. We consider launching
|
||||
* a new service-side rend circ to a client when the previous one
|
||||
|
Loading…
Reference in New Issue
Block a user