diff --git a/changes/bug1772 b/changes/bug1772
new file mode 100644
index 0000000000..9a4f809ee2
--- /dev/null
+++ b/changes/bug1772
@@ -0,0 +1,11 @@
+ o Minor bugfixes:
+ - Simplify the logic that causes us to decide if the network is unavailable
+ for purposes of recording circuit build times. If we receive no cells
+ whatsoever for the entire duration of a circuit's full measured lifetime,
+ the network is probably down. This should hopefully reduce some of the
+ cases where we see ridiculous circuit build timeouts for people with spotty
+ wireless connections. Fixes bug 1772; bugfix on 0.2.2.2-alpha.
+ - Prevent the circuit build timeout from becoming larger than the maximum
+ build time we have ever seen. Also, prevent the measurement time period
+ from becoming larger than twice that value. Fixes bug 1772; bugfix on
+ 0.2.2.2-alpha
diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c
index fde2e7f494..473b28e872 100644
--- a/src/or/circuitbuild.c
+++ b/src/or/circuitbuild.c
@@ -149,6 +149,14 @@ circuit_build_times_min_circs_to_observe(void)
return num;
}
+/** Return true iff cbt has recorded enough build times that we
+ * want to start acting on the timeout it implies. */
+int
+circuit_build_times_enough_to_compute(circuit_build_times_t *cbt)
+{
+ return cbt->total_build_times >= circuit_build_times_min_circs_to_observe();
+}
+
double
circuit_build_times_quantile_cutoff(void)
{
@@ -292,8 +300,8 @@ circuit_build_times_reset(circuit_build_times_t *cbt)
/**
* Initialize the buildtimes structure for first use.
*
- * Sets the initial timeout value based to either the
- * config setting or BUILD_TIMEOUT_INITIAL_VALUE.
+ * Sets the initial timeout values based on either the config setting,
+ * the consensus param, or the default (CBT_DEFAULT_TIMEOUT_INITIAL_VALUE).
*/
void
circuit_build_times_init(circuit_build_times_t *cbt)
@@ -306,6 +314,7 @@ circuit_build_times_init(circuit_build_times_t *cbt)
control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
}
+#if 0
/**
* Rewind our build time history by n positions.
*/
@@ -332,6 +341,7 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
"Rewound history by %d places. Current index: %d. "
"Total: %d", n, cbt->build_times_idx, cbt->total_build_times);
}
+#endif
/**
* Add a new build time value time to the set of build times. Time
@@ -916,9 +926,7 @@ int
circuit_build_times_needs_circuits(circuit_build_times_t *cbt)
{
/* Return true if < MIN_CIRCUITS_TO_OBSERVE */
- if (cbt->total_build_times < circuit_build_times_min_circs_to_observe())
- return 1;
- return 0;
+ return !circuit_build_times_enough_to_compute(cbt);
}
/**
@@ -933,7 +941,11 @@ circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt)
}
/**
- * Called to indicate that the network showed some signs of liveness.
+ * Called to indicate that the network showed some signs of liveness,
+ * i.e. we received a cell.
+ *
+ * This is used by circuit_build_times_network_check_live() to decide
+ * if we should record the circuit build timeout or not.
*
* This function is called every time we receive a cell. Avoid
* syscalls, events, and other high-intensity work.
@@ -941,14 +953,26 @@ circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt)
void
circuit_build_times_network_is_live(circuit_build_times_t *cbt)
{
- cbt->liveness.network_last_live = approx_time();
- cbt->liveness.nonlive_discarded = 0;
+ time_t now = approx_time();
+ if (cbt->liveness.nonlive_timeouts > 0) {
+ log_notice(LD_CIRC,
+ "Tor now sees network activity. Restoring circuit build "
+ "timeout recording. Network was down for %d seconds "
+ "during %d circuit attempts.",
+ (int)(now - cbt->liveness.network_last_live),
+ cbt->liveness.nonlive_timeouts);
+ }
+ cbt->liveness.network_last_live = now;
cbt->liveness.nonlive_timeouts = 0;
}
/**
* Called to indicate that we completed a circuit. Because this circuit
* succeeded, it doesn't count as a timeout-after-the-first-hop.
+ *
+ * This is used by circuit_build_times_network_check_changed() to determine
+ * if we had too many recent timeouts and need to reset our learned timeout
+ * to something higher.
*/
void
circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
@@ -961,6 +985,10 @@ circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
/**
* A circuit just timed out. If it failed after the first hop, record it
* in our history for later deciding if the network speed has changed.
+ *
+ * This is used by circuit_build_times_network_check_changed() to determine
+ * if we had too many recent timeouts and need to reset our learned timeout
+ * to something higher.
*/
static void
circuit_build_times_network_timeout(circuit_build_times_t *cbt,
@@ -977,6 +1005,9 @@ circuit_build_times_network_timeout(circuit_build_times_t *cbt,
* A circuit was just forcibly closed. If there has been no recent network
* activity at all, but this circuit was launched back when we thought the
* network was live, increment the number of "nonlive" circuit timeouts.
+ *
+ * This is used by circuit_build_times_network_check_live() to decide
+ * if we should record the circuit build timeout or not.
*/
static void
circuit_build_times_network_close(circuit_build_times_t *cbt,
@@ -986,15 +1017,8 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
/*
* Check if this is a timeout that was for a circuit that spent its
* entire existence during a time where we have had no network activity.
- *
- * Also double check that it is a valid timeout after we have possibly
- * just recently reset cbt->close_ms.
- *
- * We use close_ms here because timeouts aren't actually counted as timeouts
- * until close_ms elapses.
*/
- if (cbt->liveness.network_last_live <= start_time &&
- start_time <= (now - cbt->close_ms/1000.0)) {
+ if (cbt->liveness.network_last_live < start_time) {
if (did_onehop) {
char last_live_buf[ISO_TIME_LEN+1];
char start_time_buf[ISO_TIME_LEN+1];
@@ -1009,12 +1033,25 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
now_buf);
}
cbt->liveness.nonlive_timeouts++;
+ if (cbt->liveness.nonlive_timeouts == 1) {
+ log_notice(LD_CIRC,
+ "Tor has not observed any network activity for the past %d "
+ "seconds. Disabling circuit build timeout code.",
+ (int)(now - cbt->liveness.network_last_live));
+ } else {
+ log_info(LD_CIRC,
+ "Got non-live timeout. Current count is: %d",
+ cbt->liveness.nonlive_timeouts);
+ }
}
}
/**
- * Returns false if the network has not received a cell or tls handshake
- * in the past NETWORK_NOTLIVE_TIMEOUT_COUNT circuits.
+ * When the network is not live, we do not record circuit build times.
+ *
+ * The network is considered not live if there has been at least one
+ * circuit build that began and ended (had its close_ms measurement
+ * period expire) since we last received a cell.
*
* Also has the side effect of rewinding the circuit time history
* in the case of recent liveness changes.
@@ -1022,45 +1059,8 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
int
circuit_build_times_network_check_live(circuit_build_times_t *cbt)
{
- time_t now = approx_time();
- if (cbt->liveness.nonlive_timeouts >= CBT_NETWORK_NONLIVE_DISCARD_COUNT) {
- if (!cbt->liveness.nonlive_discarded) {
- cbt->liveness.nonlive_discarded = 1;
- log_notice(LD_CIRC, "Network is no longer live (too many recent "
- "circuit timeouts). Dead for %ld seconds.",
- (long int)(now - cbt->liveness.network_last_live));
- /* Only discard NETWORK_NONLIVE_TIMEOUT_COUNT-1 because we stopped
- * counting after that */
- circuit_build_times_rewind_history(cbt,
- CBT_NETWORK_NONLIVE_TIMEOUT_COUNT-1);
- control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_DISCARD);
- }
+ if (cbt->liveness.nonlive_timeouts > 0) {
return 0;
- } else if (cbt->liveness.nonlive_timeouts >=
- CBT_NETWORK_NONLIVE_TIMEOUT_COUNT) {
- if (cbt->timeout_ms < circuit_build_times_get_initial_timeout()) {
- log_notice(LD_CIRC,
- "Network is flaky. No activity for %ld seconds. "
- "Temporarily raising timeout to %lds.",
- (long int)(now - cbt->liveness.network_last_live),
- tor_lround(circuit_build_times_get_initial_timeout()/1000));
- cbt->liveness.suspended_timeout = cbt->timeout_ms;
- cbt->liveness.suspended_close_timeout = cbt->close_ms;
- cbt->close_ms = cbt->timeout_ms
- = circuit_build_times_get_initial_timeout();
- control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_SUSPENDED);
- }
-
- return 0;
- } else if (cbt->liveness.suspended_timeout > 0) {
- log_notice(LD_CIRC,
- "Network activity has resumed. "
- "Resuming circuit timeout calculations.");
- cbt->timeout_ms = cbt->liveness.suspended_timeout;
- cbt->close_ms = cbt->liveness.suspended_close_timeout;
- cbt->liveness.suspended_timeout = 0;
- cbt->liveness.suspended_close_timeout = 0;
- control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESUME);
}
return 1;
@@ -1069,7 +1069,8 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt)
/**
* Returns true if we have seen more than MAX_RECENT_TIMEOUT_COUNT of
* the past RECENT_CIRCUITS time out after the first hop. Used to detect
- * if the network connection has changed significantly.
+ * if the network connection has changed significantly, and if so,
+ * resets our circuit build timeout to the default.
*
* Also resets the entire timeout history in this case and causes us
* to restart the process of building test circuits and estimating a
@@ -1228,9 +1229,9 @@ circuit_build_times_count_timeout(circuit_build_times_t *cbt,
static int
circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
{
- if (cbt->total_build_times < circuit_build_times_min_circs_to_observe()) {
+ build_time_t max_time;
+ if (!circuit_build_times_enough_to_compute(cbt))
return 0;
- }
if (!circuit_build_times_update_alpha(cbt))
return 0;
@@ -1241,11 +1242,29 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
cbt->close_ms = circuit_build_times_calculate_timeout(cbt,
circuit_build_times_close_quantile());
+ max_time = circuit_build_times_max(cbt);
+
/* Sometimes really fast guard nodes give us such a steep curve
* that this ends up being not that much greater than timeout_ms.
* Make it be at least 1 min to handle this case. */
cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
+ if (cbt->timeout_ms > max_time) {
+ log_notice(LD_CIRC,
+ "Circuit build timeout of %dms is beyond the maximum build "
+ "time we have ever observed. Capping it to %dms.",
+ (int)cbt->timeout_ms, max_time);
+ cbt->timeout_ms = max_time;
+ }
+
+ if (max_time < INT32_MAX/2 && cbt->close_ms > 2*max_time) {
+ log_notice(LD_CIRC,
+ "Circuit build measurement period of %dms is more than twice "
+ "the maximum build time we have ever observed. Capping it to "
+ "%dms.", (int)cbt->close_ms, 2*max_time);
+ cbt->close_ms = 2*max_time;
+ }
+
cbt->have_computed_timeout = 1;
return 1;
}
diff --git a/src/or/circuitbuild.h b/src/or/circuitbuild.h
index 9a8e4c3879..888bf9d255 100644
--- a/src/or/circuitbuild.h
+++ b/src/or/circuitbuild.h
@@ -79,6 +79,7 @@ void entries_retry_all(or_options_t *options);
void entry_guards_free_all(void);
extern circuit_build_times_t circ_times;
+int circuit_build_times_enough_to_compute(circuit_build_times_t *cbt);
void circuit_build_times_update_state(circuit_build_times_t *cbt,
or_state_t *state);
int circuit_build_times_parse_state(circuit_build_times_t *cbt,
diff --git a/src/or/circuituse.c b/src/or/circuituse.c
index 1fbe5a82b9..3af0fb642d 100644
--- a/src/or/circuituse.c
+++ b/src/or/circuituse.c
@@ -34,8 +34,6 @@ extern circuit_t *global_circuitlist; /* from circuitlist.c */
static void circuit_expire_old_circuits_clientside(time_t now);
static void circuit_increment_failure_count(void);
-long int lround(double x);
-
/** Return 1 if circ could be returned by circuit_get_best().
* Else return 0.
*/
@@ -280,13 +278,14 @@ void
circuit_expire_building(time_t now)
{
circuit_t *victim, *next_circ = global_circuitlist;
- /* circ_times.timeout is BUILD_TIMEOUT_INITIAL_VALUE if we haven't
- * decided on a customized one yet */
- time_t general_cutoff = now - lround(circ_times.timeout_ms/1000);
- time_t begindir_cutoff = now - lround(circ_times.timeout_ms/2000);
- time_t fourhop_cutoff = now - lround(4*circ_times.timeout_ms/3000);
- time_t cannibalize_cutoff = now - lround(circ_times.timeout_ms/2000);
- time_t close_cutoff = now - lround(circ_times.close_ms/1000);
+ /* circ_times.timeout_ms and circ_times.close_ms are from
+ * circuit_build_times_get_initial_timeout() if we haven't computed
+ * custom timeouts yet */
+ time_t general_cutoff = now - tor_lround(circ_times.timeout_ms/1000);
+ time_t begindir_cutoff = now - tor_lround(circ_times.timeout_ms/2000);
+ time_t fourhop_cutoff = now - tor_lround(4*circ_times.timeout_ms/3000);
+ time_t cannibalize_cutoff = now - tor_lround(circ_times.timeout_ms/2000);
+ time_t close_cutoff = now - tor_lround(circ_times.close_ms/1000);
time_t introcirc_cutoff = begindir_cutoff;
cpath_build_state_t *build_state;
@@ -385,7 +384,8 @@ circuit_expire_building(time_t now)
continue;
}
- if (circuit_timeout_want_to_count_circ(TO_ORIGIN_CIRCUIT(victim))) {
+ if (circuit_timeout_want_to_count_circ(TO_ORIGIN_CIRCUIT(victim)) &&
+ circuit_build_times_enough_to_compute(&circ_times)) {
/* Circuits are allowed to last longer for measurement.
* Switch their purpose and wait. */
if (victim->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
diff --git a/src/or/or.h b/src/or/or.h
index 7aee493b39..9f93f112b2 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -2967,26 +2967,6 @@ typedef uint32_t build_time_t;
/** Save state every 10 circuits */
#define CBT_SAVE_STATE_EVERY 10
-/* Circuit Build Timeout network liveness constants */
-
-/**
- * Have we received a cell in the last N circ attempts?
- *
- * This tells us when to temporarily switch back to
- * BUILD_TIMEOUT_INITIAL_VALUE until we start getting cells,
- * at which point we switch back to computing the timeout from
- * our saved history.
- */
-#define CBT_NETWORK_NONLIVE_TIMEOUT_COUNT 3
-
-/**
- * This tells us when to toss out the last streak of N timeouts.
- *
- * If instead we start getting cells, we switch back to computing the timeout
- * from our saved history.
- */
-#define CBT_NETWORK_NONLIVE_DISCARD_COUNT (CBT_NETWORK_NONLIVE_TIMEOUT_COUNT*2)
-
/* Circuit build times consensus parameters */
/**
@@ -3005,8 +2985,9 @@ typedef uint32_t build_time_t;
* Maximum count of timeouts that finish the first hop in the past
* RECENT_CIRCUITS before calculating a new timeout.
*
- * This tells us to abandon timeout history and set
- * the timeout back to BUILD_TIMEOUT_INITIAL_VALUE.
+ * This tells us whether to abandon timeout history and set
+ * the timeout back to whatever circuit_build_times_get_initial_timeout()
+ * gives us.
*/
#define CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT (CBT_DEFAULT_RECENT_CIRCUITS*9/10)
@@ -3027,9 +3008,7 @@ double circuit_build_times_quantile_cutoff(void);
#define CBT_DEFAULT_TIMEOUT_INITIAL_VALUE (60*1000)
int32_t circuit_build_times_initial_timeout(void);
-#if CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT < 1 || \
- CBT_NETWORK_NONLIVE_DISCARD_COUNT < 1 || \
- CBT_NETWORK_NONLIVE_TIMEOUT_COUNT < 1
+#if CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT < 1
#error "RECENT_CIRCUITS is set too low."
#endif
@@ -3039,8 +3018,6 @@ typedef struct {
time_t network_last_live;
/** If the network is not live, how many timeouts has this caused? */
int nonlive_timeouts;
- /** If the network is not live, have we yet discarded our history? */
- int nonlive_discarded;
/** Circular array of circuits that have made it to the first hop. Slot is
* 1 if circuit timed out, 0 if circuit succeeded */
int8_t *timeouts_after_firsthop;
@@ -3048,12 +3025,6 @@ typedef struct {
int num_recent_circs;
/** Index into circular array. */
int after_firsthop_idx;
- /** Timeout gathering is suspended if non-zero. The old timeout value
- * is stored here in that case. */
- double suspended_timeout;
- /** Timeout gathering is suspended if non-zero. The old close value
- * is stored here in that case. */
- double suspended_close_timeout;
} network_liveness_t;
/** Structure for circuit build times history */
diff --git a/src/test/test.c b/src/test/test.c
index d9528328b8..8d8c46fca2 100644
--- a/src/test/test.c
+++ b/src/test/test.c
@@ -499,28 +499,14 @@ test_circuit_timeout(void)
build_times_idx = estimate.build_times_idx;
total_build_times = estimate.total_build_times;
- for (i = 0; i < CBT_NETWORK_NONLIVE_TIMEOUT_COUNT; i++) {
- test_assert(circuit_build_times_network_check_live(&estimate));
- test_assert(circuit_build_times_network_check_live(&final));
- circuit_build_times_count_close(&estimate, 0,
- (time_t)(approx_time()-estimate.close_ms/1000.0-1));
- circuit_build_times_count_close(&final, 0,
- (time_t)(approx_time()-final.close_ms/1000.0-1));
- }
+ test_assert(circuit_build_times_network_check_live(&estimate));
+ test_assert(circuit_build_times_network_check_live(&final));
- test_assert(!circuit_build_times_network_check_live(&estimate));
- test_assert(!circuit_build_times_network_check_live(&final));
-
- for ( ; i < CBT_NETWORK_NONLIVE_DISCARD_COUNT; i++) {
- circuit_build_times_count_close(&estimate, 0,
- (time_t)(approx_time()-estimate.close_ms/1000.0-1));
-
- if (i < CBT_NETWORK_NONLIVE_DISCARD_COUNT-1) {
- circuit_build_times_count_close(&final, 0,
- (time_t)(approx_time()-final.close_ms/1000.0-1));
- }
- }
+ circuit_build_times_count_close(&estimate, 0,
+ (time_t)(approx_time()-estimate.close_ms/1000.0-1));
+ circuit_build_times_count_close(&final, 0,
+ (time_t)(approx_time()-final.close_ms/1000.0-1));
test_assert(!circuit_build_times_network_check_live(&estimate));
test_assert(!circuit_build_times_network_check_live(&final));