Merge branch 'bug1772' into maint-0.2.2

This commit is contained in:
Roger Dingledine 2010-09-29 23:52:18 -04:00
commit 3cbe463e96
6 changed files with 110 additions and 122 deletions

11
changes/bug1772 Normal file
View File

@ -0,0 +1,11 @@
o Minor bugfixes:
- Simplify the logic that causes us to decide if the network is unavailable
for purposes of recording circuit build times. If we receive no cells
whatsoever for the entire duration of a circuit's full measured lifetime,
the network is probably down. This should hopefully reduce some of the
cases where we see ridiculous circuit build timeouts for people with spotty
wireless connections. Fixes bug 1772; bugfix on 0.2.2.2-alpha.
- Prevent the circuit build timeout from becoming larger than the maximum
build time we have ever seen. Also, prevent the measurement time period
from becoming larger than twice that value. Fixes bug 1772; bugfix on
0.2.2.2-alpha

View File

@ -149,6 +149,14 @@ circuit_build_times_min_circs_to_observe(void)
return num; return num;
} }
/** Return true iff <b>cbt</b> has recorded enough build times that we
* want to start acting on the timeout it implies. */
int
circuit_build_times_enough_to_compute(circuit_build_times_t *cbt)
{
return cbt->total_build_times >= circuit_build_times_min_circs_to_observe();
}
double double
circuit_build_times_quantile_cutoff(void) circuit_build_times_quantile_cutoff(void)
{ {
@ -292,8 +300,8 @@ circuit_build_times_reset(circuit_build_times_t *cbt)
/** /**
* Initialize the buildtimes structure for first use. * Initialize the buildtimes structure for first use.
* *
* Sets the initial timeout value based to either the * Sets the initial timeout values based on either the config setting,
* config setting or BUILD_TIMEOUT_INITIAL_VALUE. * the consensus param, or the default (CBT_DEFAULT_TIMEOUT_INITIAL_VALUE).
*/ */
void void
circuit_build_times_init(circuit_build_times_t *cbt) circuit_build_times_init(circuit_build_times_t *cbt)
@ -306,6 +314,7 @@ circuit_build_times_init(circuit_build_times_t *cbt)
control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET); control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
} }
#if 0
/** /**
* Rewind our build time history by n positions. * Rewind our build time history by n positions.
*/ */
@ -332,6 +341,7 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
"Rewound history by %d places. Current index: %d. " "Rewound history by %d places. Current index: %d. "
"Total: %d", n, cbt->build_times_idx, cbt->total_build_times); "Total: %d", n, cbt->build_times_idx, cbt->total_build_times);
} }
#endif
/** /**
* Add a new build time value <b>time</b> to the set of build times. Time * Add a new build time value <b>time</b> to the set of build times. Time
@ -916,9 +926,7 @@ int
circuit_build_times_needs_circuits(circuit_build_times_t *cbt) circuit_build_times_needs_circuits(circuit_build_times_t *cbt)
{ {
/* Return true if < MIN_CIRCUITS_TO_OBSERVE */ /* Return true if < MIN_CIRCUITS_TO_OBSERVE */
if (cbt->total_build_times < circuit_build_times_min_circs_to_observe()) return !circuit_build_times_enough_to_compute(cbt);
return 1;
return 0;
} }
/** /**
@ -933,7 +941,11 @@ circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt)
} }
/** /**
* Called to indicate that the network showed some signs of liveness. * Called to indicate that the network showed some signs of liveness,
* i.e. we received a cell.
*
* This is used by circuit_build_times_network_check_live() to decide
* if we should record the circuit build timeout or not.
* *
* This function is called every time we receive a cell. Avoid * This function is called every time we receive a cell. Avoid
* syscalls, events, and other high-intensity work. * syscalls, events, and other high-intensity work.
@ -941,14 +953,26 @@ circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt)
void void
circuit_build_times_network_is_live(circuit_build_times_t *cbt) circuit_build_times_network_is_live(circuit_build_times_t *cbt)
{ {
cbt->liveness.network_last_live = approx_time(); time_t now = approx_time();
cbt->liveness.nonlive_discarded = 0; if (cbt->liveness.nonlive_timeouts > 0) {
log_notice(LD_CIRC,
"Tor now sees network activity. Restoring circuit build "
"timeout recording. Network was down for %d seconds "
"during %d circuit attempts.",
(int)(now - cbt->liveness.network_last_live),
cbt->liveness.nonlive_timeouts);
}
cbt->liveness.network_last_live = now;
cbt->liveness.nonlive_timeouts = 0; cbt->liveness.nonlive_timeouts = 0;
} }
/** /**
* Called to indicate that we completed a circuit. Because this circuit * Called to indicate that we completed a circuit. Because this circuit
* succeeded, it doesn't count as a timeout-after-the-first-hop. * succeeded, it doesn't count as a timeout-after-the-first-hop.
*
* This is used by circuit_build_times_network_check_changed() to determine
* if we had too many recent timeouts and need to reset our learned timeout
* to something higher.
*/ */
void void
circuit_build_times_network_circ_success(circuit_build_times_t *cbt) circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
@ -961,6 +985,10 @@ circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
/** /**
* A circuit just timed out. If it failed after the first hop, record it * A circuit just timed out. If it failed after the first hop, record it
* in our history for later deciding if the network speed has changed. * in our history for later deciding if the network speed has changed.
*
* This is used by circuit_build_times_network_check_changed() to determine
* if we had too many recent timeouts and need to reset our learned timeout
* to something higher.
*/ */
static void static void
circuit_build_times_network_timeout(circuit_build_times_t *cbt, circuit_build_times_network_timeout(circuit_build_times_t *cbt,
@ -977,6 +1005,9 @@ circuit_build_times_network_timeout(circuit_build_times_t *cbt,
* A circuit was just forcibly closed. If there has been no recent network * A circuit was just forcibly closed. If there has been no recent network
* activity at all, but this circuit was launched back when we thought the * activity at all, but this circuit was launched back when we thought the
* network was live, increment the number of "nonlive" circuit timeouts. * network was live, increment the number of "nonlive" circuit timeouts.
*
* This is used by circuit_build_times_network_check_live() to decide
* if we should record the circuit build timeout or not.
*/ */
static void static void
circuit_build_times_network_close(circuit_build_times_t *cbt, circuit_build_times_network_close(circuit_build_times_t *cbt,
@ -986,15 +1017,8 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
/* /*
* Check if this is a timeout that was for a circuit that spent its * Check if this is a timeout that was for a circuit that spent its
* entire existence during a time where we have had no network activity. * entire existence during a time where we have had no network activity.
*
* Also double check that it is a valid timeout after we have possibly
* just recently reset cbt->close_ms.
*
* We use close_ms here because timeouts aren't actually counted as timeouts
* until close_ms elapses.
*/ */
if (cbt->liveness.network_last_live <= start_time && if (cbt->liveness.network_last_live < start_time) {
start_time <= (now - cbt->close_ms/1000.0)) {
if (did_onehop) { if (did_onehop) {
char last_live_buf[ISO_TIME_LEN+1]; char last_live_buf[ISO_TIME_LEN+1];
char start_time_buf[ISO_TIME_LEN+1]; char start_time_buf[ISO_TIME_LEN+1];
@ -1009,12 +1033,25 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
now_buf); now_buf);
} }
cbt->liveness.nonlive_timeouts++; cbt->liveness.nonlive_timeouts++;
if (cbt->liveness.nonlive_timeouts == 1) {
log_notice(LD_CIRC,
"Tor has not observed any network activity for the past %d "
"seconds. Disabling circuit build timeout code.",
(int)(now - cbt->liveness.network_last_live));
} else {
log_info(LD_CIRC,
"Got non-live timeout. Current count is: %d",
cbt->liveness.nonlive_timeouts);
}
} }
} }
/** /**
* Returns false if the network has not received a cell or tls handshake * When the network is not live, we do not record circuit build times.
* in the past NETWORK_NOTLIVE_TIMEOUT_COUNT circuits. *
* The network is considered not live if there has been at least one
* circuit build that began and ended (had its close_ms measurement
* period expire) since we last received a cell.
* *
* Also has the side effect of rewinding the circuit time history * Also has the side effect of rewinding the circuit time history
* in the case of recent liveness changes. * in the case of recent liveness changes.
@ -1022,45 +1059,8 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
int int
circuit_build_times_network_check_live(circuit_build_times_t *cbt) circuit_build_times_network_check_live(circuit_build_times_t *cbt)
{ {
time_t now = approx_time(); if (cbt->liveness.nonlive_timeouts > 0) {
if (cbt->liveness.nonlive_timeouts >= CBT_NETWORK_NONLIVE_DISCARD_COUNT) {
if (!cbt->liveness.nonlive_discarded) {
cbt->liveness.nonlive_discarded = 1;
log_notice(LD_CIRC, "Network is no longer live (too many recent "
"circuit timeouts). Dead for %ld seconds.",
(long int)(now - cbt->liveness.network_last_live));
/* Only discard NETWORK_NONLIVE_TIMEOUT_COUNT-1 because we stopped
* counting after that */
circuit_build_times_rewind_history(cbt,
CBT_NETWORK_NONLIVE_TIMEOUT_COUNT-1);
control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_DISCARD);
}
return 0; return 0;
} else if (cbt->liveness.nonlive_timeouts >=
CBT_NETWORK_NONLIVE_TIMEOUT_COUNT) {
if (cbt->timeout_ms < circuit_build_times_get_initial_timeout()) {
log_notice(LD_CIRC,
"Network is flaky. No activity for %ld seconds. "
"Temporarily raising timeout to %lds.",
(long int)(now - cbt->liveness.network_last_live),
tor_lround(circuit_build_times_get_initial_timeout()/1000));
cbt->liveness.suspended_timeout = cbt->timeout_ms;
cbt->liveness.suspended_close_timeout = cbt->close_ms;
cbt->close_ms = cbt->timeout_ms
= circuit_build_times_get_initial_timeout();
control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_SUSPENDED);
}
return 0;
} else if (cbt->liveness.suspended_timeout > 0) {
log_notice(LD_CIRC,
"Network activity has resumed. "
"Resuming circuit timeout calculations.");
cbt->timeout_ms = cbt->liveness.suspended_timeout;
cbt->close_ms = cbt->liveness.suspended_close_timeout;
cbt->liveness.suspended_timeout = 0;
cbt->liveness.suspended_close_timeout = 0;
control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESUME);
} }
return 1; return 1;
@ -1069,7 +1069,8 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt)
/** /**
* Returns true if we have seen more than MAX_RECENT_TIMEOUT_COUNT of * Returns true if we have seen more than MAX_RECENT_TIMEOUT_COUNT of
* the past RECENT_CIRCUITS time out after the first hop. Used to detect * the past RECENT_CIRCUITS time out after the first hop. Used to detect
* if the network connection has changed significantly. * if the network connection has changed significantly, and if so,
* resets our circuit build timeout to the default.
* *
* Also resets the entire timeout history in this case and causes us * Also resets the entire timeout history in this case and causes us
* to restart the process of building test circuits and estimating a * to restart the process of building test circuits and estimating a
@ -1228,9 +1229,9 @@ circuit_build_times_count_timeout(circuit_build_times_t *cbt,
static int static int
circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt) circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
{ {
if (cbt->total_build_times < circuit_build_times_min_circs_to_observe()) { build_time_t max_time;
if (!circuit_build_times_enough_to_compute(cbt))
return 0; return 0;
}
if (!circuit_build_times_update_alpha(cbt)) if (!circuit_build_times_update_alpha(cbt))
return 0; return 0;
@ -1241,11 +1242,29 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
cbt->close_ms = circuit_build_times_calculate_timeout(cbt, cbt->close_ms = circuit_build_times_calculate_timeout(cbt,
circuit_build_times_close_quantile()); circuit_build_times_close_quantile());
max_time = circuit_build_times_max(cbt);
/* Sometimes really fast guard nodes give us such a steep curve /* Sometimes really fast guard nodes give us such a steep curve
* that this ends up being not that much greater than timeout_ms. * that this ends up being not that much greater than timeout_ms.
* Make it be at least 1 min to handle this case. */ * Make it be at least 1 min to handle this case. */
cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout()); cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
if (cbt->timeout_ms > max_time) {
log_notice(LD_CIRC,
"Circuit build timeout of %dms is beyond the maximum build "
"time we have ever observed. Capping it to %dms.",
(int)cbt->timeout_ms, max_time);
cbt->timeout_ms = max_time;
}
if (max_time < INT32_MAX/2 && cbt->close_ms > 2*max_time) {
log_notice(LD_CIRC,
"Circuit build measurement period of %dms is more than twice "
"the maximum build time we have ever observed. Capping it to "
"%dms.", (int)cbt->close_ms, 2*max_time);
cbt->close_ms = 2*max_time;
}
cbt->have_computed_timeout = 1; cbt->have_computed_timeout = 1;
return 1; return 1;
} }

View File

@ -79,6 +79,7 @@ void entries_retry_all(or_options_t *options);
void entry_guards_free_all(void); void entry_guards_free_all(void);
extern circuit_build_times_t circ_times; extern circuit_build_times_t circ_times;
int circuit_build_times_enough_to_compute(circuit_build_times_t *cbt);
void circuit_build_times_update_state(circuit_build_times_t *cbt, void circuit_build_times_update_state(circuit_build_times_t *cbt,
or_state_t *state); or_state_t *state);
int circuit_build_times_parse_state(circuit_build_times_t *cbt, int circuit_build_times_parse_state(circuit_build_times_t *cbt,

View File

@ -34,8 +34,6 @@ extern circuit_t *global_circuitlist; /* from circuitlist.c */
static void circuit_expire_old_circuits_clientside(time_t now); static void circuit_expire_old_circuits_clientside(time_t now);
static void circuit_increment_failure_count(void); static void circuit_increment_failure_count(void);
long int lround(double x);
/** Return 1 if <b>circ</b> could be returned by circuit_get_best(). /** Return 1 if <b>circ</b> could be returned by circuit_get_best().
* Else return 0. * Else return 0.
*/ */
@ -280,13 +278,14 @@ void
circuit_expire_building(time_t now) circuit_expire_building(time_t now)
{ {
circuit_t *victim, *next_circ = global_circuitlist; circuit_t *victim, *next_circ = global_circuitlist;
/* circ_times.timeout is BUILD_TIMEOUT_INITIAL_VALUE if we haven't /* circ_times.timeout_ms and circ_times.close_ms are from
* decided on a customized one yet */ * circuit_build_times_get_initial_timeout() if we haven't computed
time_t general_cutoff = now - lround(circ_times.timeout_ms/1000); * custom timeouts yet */
time_t begindir_cutoff = now - lround(circ_times.timeout_ms/2000); time_t general_cutoff = now - tor_lround(circ_times.timeout_ms/1000);
time_t fourhop_cutoff = now - lround(4*circ_times.timeout_ms/3000); time_t begindir_cutoff = now - tor_lround(circ_times.timeout_ms/2000);
time_t cannibalize_cutoff = now - lround(circ_times.timeout_ms/2000); time_t fourhop_cutoff = now - tor_lround(4*circ_times.timeout_ms/3000);
time_t close_cutoff = now - lround(circ_times.close_ms/1000); time_t cannibalize_cutoff = now - tor_lround(circ_times.timeout_ms/2000);
time_t close_cutoff = now - tor_lround(circ_times.close_ms/1000);
time_t introcirc_cutoff = begindir_cutoff; time_t introcirc_cutoff = begindir_cutoff;
cpath_build_state_t *build_state; cpath_build_state_t *build_state;
@ -385,7 +384,8 @@ circuit_expire_building(time_t now)
continue; continue;
} }
if (circuit_timeout_want_to_count_circ(TO_ORIGIN_CIRCUIT(victim))) { if (circuit_timeout_want_to_count_circ(TO_ORIGIN_CIRCUIT(victim)) &&
circuit_build_times_enough_to_compute(&circ_times)) {
/* Circuits are allowed to last longer for measurement. /* Circuits are allowed to last longer for measurement.
* Switch their purpose and wait. */ * Switch their purpose and wait. */
if (victim->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) { if (victim->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {

View File

@ -2967,26 +2967,6 @@ typedef uint32_t build_time_t;
/** Save state every 10 circuits */ /** Save state every 10 circuits */
#define CBT_SAVE_STATE_EVERY 10 #define CBT_SAVE_STATE_EVERY 10
/* Circuit Build Timeout network liveness constants */
/**
* Have we received a cell in the last N circ attempts?
*
* This tells us when to temporarily switch back to
* BUILD_TIMEOUT_INITIAL_VALUE until we start getting cells,
* at which point we switch back to computing the timeout from
* our saved history.
*/
#define CBT_NETWORK_NONLIVE_TIMEOUT_COUNT 3
/**
* This tells us when to toss out the last streak of N timeouts.
*
* If instead we start getting cells, we switch back to computing the timeout
* from our saved history.
*/
#define CBT_NETWORK_NONLIVE_DISCARD_COUNT (CBT_NETWORK_NONLIVE_TIMEOUT_COUNT*2)
/* Circuit build times consensus parameters */ /* Circuit build times consensus parameters */
/** /**
@ -3005,8 +2985,9 @@ typedef uint32_t build_time_t;
* Maximum count of timeouts that finish the first hop in the past * Maximum count of timeouts that finish the first hop in the past
* RECENT_CIRCUITS before calculating a new timeout. * RECENT_CIRCUITS before calculating a new timeout.
* *
* This tells us to abandon timeout history and set * This tells us whether to abandon timeout history and set
* the timeout back to BUILD_TIMEOUT_INITIAL_VALUE. * the timeout back to whatever circuit_build_times_get_initial_timeout()
* gives us.
*/ */
#define CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT (CBT_DEFAULT_RECENT_CIRCUITS*9/10) #define CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT (CBT_DEFAULT_RECENT_CIRCUITS*9/10)
@ -3027,9 +3008,7 @@ double circuit_build_times_quantile_cutoff(void);
#define CBT_DEFAULT_TIMEOUT_INITIAL_VALUE (60*1000) #define CBT_DEFAULT_TIMEOUT_INITIAL_VALUE (60*1000)
int32_t circuit_build_times_initial_timeout(void); int32_t circuit_build_times_initial_timeout(void);
#if CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT < 1 || \ #if CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT < 1
CBT_NETWORK_NONLIVE_DISCARD_COUNT < 1 || \
CBT_NETWORK_NONLIVE_TIMEOUT_COUNT < 1
#error "RECENT_CIRCUITS is set too low." #error "RECENT_CIRCUITS is set too low."
#endif #endif
@ -3039,8 +3018,6 @@ typedef struct {
time_t network_last_live; time_t network_last_live;
/** If the network is not live, how many timeouts has this caused? */ /** If the network is not live, how many timeouts has this caused? */
int nonlive_timeouts; int nonlive_timeouts;
/** If the network is not live, have we yet discarded our history? */
int nonlive_discarded;
/** Circular array of circuits that have made it to the first hop. Slot is /** Circular array of circuits that have made it to the first hop. Slot is
* 1 if circuit timed out, 0 if circuit succeeded */ * 1 if circuit timed out, 0 if circuit succeeded */
int8_t *timeouts_after_firsthop; int8_t *timeouts_after_firsthop;
@ -3048,12 +3025,6 @@ typedef struct {
int num_recent_circs; int num_recent_circs;
/** Index into circular array. */ /** Index into circular array. */
int after_firsthop_idx; int after_firsthop_idx;
/** Timeout gathering is suspended if non-zero. The old timeout value
* is stored here in that case. */
double suspended_timeout;
/** Timeout gathering is suspended if non-zero. The old close value
* is stored here in that case. */
double suspended_close_timeout;
} network_liveness_t; } network_liveness_t;
/** Structure for circuit build times history */ /** Structure for circuit build times history */

View File

@ -499,7 +499,7 @@ test_circuit_timeout(void)
build_times_idx = estimate.build_times_idx; build_times_idx = estimate.build_times_idx;
total_build_times = estimate.total_build_times; total_build_times = estimate.total_build_times;
for (i = 0; i < CBT_NETWORK_NONLIVE_TIMEOUT_COUNT; i++) {
test_assert(circuit_build_times_network_check_live(&estimate)); test_assert(circuit_build_times_network_check_live(&estimate));
test_assert(circuit_build_times_network_check_live(&final)); test_assert(circuit_build_times_network_check_live(&final));
@ -507,20 +507,6 @@ test_circuit_timeout(void)
(time_t)(approx_time()-estimate.close_ms/1000.0-1)); (time_t)(approx_time()-estimate.close_ms/1000.0-1));
circuit_build_times_count_close(&final, 0, circuit_build_times_count_close(&final, 0,
(time_t)(approx_time()-final.close_ms/1000.0-1)); (time_t)(approx_time()-final.close_ms/1000.0-1));
}
test_assert(!circuit_build_times_network_check_live(&estimate));
test_assert(!circuit_build_times_network_check_live(&final));
for ( ; i < CBT_NETWORK_NONLIVE_DISCARD_COUNT; i++) {
circuit_build_times_count_close(&estimate, 0,
(time_t)(approx_time()-estimate.close_ms/1000.0-1));
if (i < CBT_NETWORK_NONLIVE_DISCARD_COUNT-1) {
circuit_build_times_count_close(&final, 0,
(time_t)(approx_time()-final.close_ms/1000.0-1));
}
}
test_assert(!circuit_build_times_network_check_live(&estimate)); test_assert(!circuit_build_times_network_check_live(&estimate));
test_assert(!circuit_build_times_network_check_live(&final)); test_assert(!circuit_build_times_network_check_live(&final));