Bug #23114: Time out circuits immediately.

This changes the purpose of circuits that are past the timeout to measurement
*as they are built*, ensuring accurate application of the timeout logic.
This commit is contained in:
Mike Perry 2017-12-07 00:04:09 +00:00
parent b5d4cd1b41
commit ed89588c4f
6 changed files with 145 additions and 36 deletions

6
changes/bug23114 Normal file
View File

@ -0,0 +1,6 @@
o Minor bugfixes (Performance):
- Consider circuits for timeout as soon as they complete a hop. This
is more accurate than applying the timeout in circuit_expire_building()
because that function is only called once per second, which is now
too slow for typical timeouts on the current network. Fixes bug 23114;
bugfix on 0.2.2.2-alpha.

View File

@ -109,6 +109,14 @@ static void cpath_ref_decref(crypt_path_reference_t *cpath_ref);
static void circuit_about_to_free_atexit(circuit_t *circ);
static void circuit_about_to_free(circuit_t *circ);
/**
* A cached value of the current state of the origin circuit list. Has the
* value 1 if we saw any opened circuits recently (since the last call to
* circuit_any_opened_circuits(), which gets called around once a second by
* circuit_expire_building). 0 otherwise.
*/
static int any_opened_circs_cached_val = 0;
/********* END VARIABLES ************/
/** A map from channel and circuit ID to circuit. (Lookup performance is
@ -597,6 +605,56 @@ circuit_get_global_origin_circuit_list(void)
return global_origin_circuit_list;
}
/**
* Return true if we have any opened general-purpose 3 hop
* origin circuits.
*
* The result from this function is cached for use by
* circuit_any_opened_circuits_cached().
*/
int
circuit_any_opened_circuits(void)
{
SMARTLIST_FOREACH_BEGIN(circuit_get_global_origin_circuit_list(),
const origin_circuit_t *, next_circ) {
if (!TO_CIRCUIT(next_circ)->marked_for_close &&
next_circ->has_opened &&
TO_CIRCUIT(next_circ)->state == CIRCUIT_STATE_OPEN &&
TO_CIRCUIT(next_circ)->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT &&
next_circ->build_state &&
next_circ->build_state->desired_path_len == DEFAULT_ROUTE_LEN) {
circuit_cache_opened_circuit_state(1);
return 1;
}
} SMARTLIST_FOREACH_END(next_circ);
circuit_cache_opened_circuit_state(0);
return 0;
}
/**
* Cache the "any circuits opened" state, as specified in param
* circuits_are_opened. This is a helper function to update
* the circuit opened status whenever we happen to look at the
* circuit list.
*/
void
circuit_cache_opened_circuit_state(int circuits_are_opened)
{
any_opened_circs_cached_val = circuits_are_opened;
}
/**
* Return true if there were any opened circuits since the last call to
* circuit_any_opened_circuits(), or since circuit_expire_building() last
* ran (it runs roughly once per second).
*/
int
circuit_any_opened_circuits_cached(void)
{
return any_opened_circs_cached_val;
}
/** Function to make circ-\>state human-readable */
const char *
circuit_state_to_string(int state)
@ -1792,7 +1850,7 @@ circuit_get_cpath_len(origin_circuit_t *circ)
/** Return the number of opened hops in circuit's path.
* If circ has no entries, or is NULL, returns 0. */
int
circuit_get_cpath_opened_len(origin_circuit_t *circ)
circuit_get_cpath_opened_len(const origin_circuit_t *circ)
{
int n = 0;
if (circ && circ->cpath) {

View File

@ -17,6 +17,10 @@
MOCK_DECL(smartlist_t *, circuit_get_global_list, (void));
smartlist_t *circuit_get_global_origin_circuit_list(void);
int circuit_any_opened_circuits(void);
int circuit_any_opened_circuits_cached(void);
void circuit_cache_opened_circuit_state(int circuits_are_opened);
const char *circuit_state_to_string(int state);
const char *circuit_purpose_to_controller_string(uint8_t purpose);
const char *circuit_purpose_to_controller_hs_state_string(uint8_t purpose);
@ -58,7 +62,7 @@ void circuit_mark_all_dirty_circs_as_unusable(void);
MOCK_DECL(void, circuit_mark_for_close_, (circuit_t *circ, int reason,
int line, const char *file));
int circuit_get_cpath_len(origin_circuit_t *circ);
int circuit_get_cpath_opened_len(origin_circuit_t *);
int circuit_get_cpath_opened_len(const origin_circuit_t *);
void circuit_clear_cpath(origin_circuit_t *circ);
crypt_path_t *circuit_get_cpath_hop(origin_circuit_t *circ, int hopnum);
void circuit_get_all_pending_on_channel(smartlist_t *out,

View File

@ -37,6 +37,7 @@
#include "rendservice.h"
#include "statefile.h"
#include "circuitlist.h"
#include "circuituse.h"
#undef log
#include <math.h>
@ -611,6 +612,35 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
}
#endif /* 0 */
/**
* Mark this circuit as timed out, but change its purpose
* so that it continues to build, allowing us to measure
* its full build time.
*/
void
circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t *circ)
{
control_event_circuit_status(circ,
CIRC_EVENT_FAILED,
END_CIRC_REASON_TIMEOUT);
circuit_change_purpose(TO_CIRCUIT(circ),
CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT);
/* Record this event to check for too many timeouts
* in a row. This function does not record a time value yet
* (we do that later); it only counts the fact that we did
* have a timeout. We also want to avoid double-counting
* already "relaxed" circuits, which are counted in
* circuit_expire_building(). */
if (!circ->relaxed_timeout) {
int first_hop_succeeded = circ->cpath &&
circ->cpath->state == CPATH_STATE_OPEN;
circuit_build_times_count_timeout(
get_circuit_build_times_mutable(),
first_hop_succeeded);
}
}
/**
* Perform the build time work that needs to be done when a circuit
* completes a hop.
@ -649,12 +679,29 @@ circuit_build_times_handle_completed_hop(origin_circuit_t *circ)
return;
}
tor_gettimeofday(&end);
timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
/* Check if we would have timed out already. If so, change the
* purpose here. But don't do any timeout handling here if there
* are no circuits opened yet. Save it for circuit_expire_building()
* (to allow it to handle timeout "relaxing" over there). */
if (timediff > get_circuit_build_timeout_ms() &&
circuit_any_opened_circuits_cached()) {
/* Circuits are allowed to last longer for measurement.
* Switch their purpose and wait. */
if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
log_info(LD_CIRC,
"Deciding to timeout circuit "U64_FORMAT"\n",
U64_PRINTF_ARG(circ->global_identifier));
circuit_build_times_mark_circ_as_measurement_only(circ);
}
}
/* If the circuit is built to exactly the DEFAULT_ROUTE_LEN,
* add it to our buildtimes. */
if (circuit_get_cpath_opened_len(circ) == DEFAULT_ROUTE_LEN) {
tor_gettimeofday(&end);
timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
/* If the circuit build time is much greater than we would have cut
* it off at, we probably had a suspend event along this codepath,
* and we should discard the value.
@ -668,7 +715,7 @@ circuit_build_times_handle_completed_hop(origin_circuit_t *circ)
} else {
/* Only count circuit times if the network is live */
if (circuit_build_times_network_check_live(
get_circuit_build_times())) {
get_circuit_build_times())) {
circuit_build_times_add_time(get_circuit_build_times_mutable(),
(build_time_t)timediff);
circuit_build_times_set_timeout(get_circuit_build_times_mutable());

View File

@ -45,6 +45,7 @@ double circuit_build_times_timeout_rate(const circuit_build_times_t *cbt);
double circuit_build_times_close_rate(const circuit_build_times_t *cbt);
void circuit_build_times_update_last_circ(circuit_build_times_t *cbt);
void circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t *circ);
#ifdef CIRCUITSTATS_PRIVATE
STATIC double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt,

View File

@ -412,8 +412,17 @@ circuit_conforms_to_options(const origin_circuit_t *circ,
}
#endif /* 0 */
/** Close all circuits that start at us, aren't open, and were born
/**
* Close all circuits that start at us, aren't open, and were born
* at least CircuitBuildTimeout seconds ago.
*
* TODO: This function is now partially redundant to
* circuit_build_times_handle_completed_hop(), but that function only
* covers circuits up to and including 3 hops that are still actually
* completing hops. However, circuit_expire_building() also handles longer
* circuits, as well as circuits that are completely stalled.
* In the future (after prop247/other path selection revamping), we probably
* want to eliminate this rats nest in favor of a simpler approach.
*/
void
circuit_expire_building(void)
@ -435,21 +444,7 @@ circuit_expire_building(void)
* we want to be more lenient with timeouts, in case the
* user has relocated and/or changed network connections.
* See bug #3443. */
SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, next_circ) {
if (!CIRCUIT_IS_ORIGIN(next_circ) || /* didn't originate here */
next_circ->marked_for_close) { /* don't mess with marked circs */
continue;
}
if (TO_ORIGIN_CIRCUIT(next_circ)->has_opened &&
next_circ->state == CIRCUIT_STATE_OPEN &&
TO_ORIGIN_CIRCUIT(next_circ)->build_state &&
TO_ORIGIN_CIRCUIT(next_circ)->build_state->desired_path_len
== DEFAULT_ROUTE_LEN) {
any_opened_circs = 1;
break;
}
} SMARTLIST_FOREACH_END(next_circ);
any_opened_circs = circuit_any_opened_circuits();
#define SET_CUTOFF(target, msec) do { \
long ms = tor_lround(msec); \
@ -493,6 +488,10 @@ circuit_expire_building(void)
SET_CUTOFF(general_cutoff, get_circuit_build_timeout_ms());
SET_CUTOFF(begindir_cutoff, get_circuit_build_timeout_ms());
// TODO: We should probably use route_len_for_purpose() here instead,
// except that does not count the extra round trip for things like server
// intros and rends.
/* > 3hop circs seem to have a 1.0 second delay on their cannibalized
* 4th hop. */
SET_CUTOFF(fourhop_cutoff, get_circuit_build_timeout_ms() * (10/6.0) + 1000);
@ -694,23 +693,17 @@ circuit_expire_building(void)
if (circuit_timeout_want_to_count_circ(TO_ORIGIN_CIRCUIT(victim)) &&
circuit_build_times_enough_to_compute(get_circuit_build_times())) {
log_info(LD_CIRC,
"Deciding to count the timeout for circuit "U64_FORMAT"\n",
U64_PRINTF_ARG(
TO_ORIGIN_CIRCUIT(victim)->global_identifier));
/* Circuits are allowed to last longer for measurement.
* Switch their purpose and wait. */
if (victim->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
control_event_circuit_status(TO_ORIGIN_CIRCUIT(victim),
CIRC_EVENT_FAILED,
END_CIRC_REASON_TIMEOUT);
circuit_change_purpose(victim, CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT);
/* Record this failure to check for too many timeouts
* in a row. This function does not record a time value yet
* (we do that later); it only counts the fact that we did
* have a timeout. We also want to avoid double-counting
* already "relaxed" circuits, which are counted above. */
if (!TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout) {
circuit_build_times_count_timeout(
get_circuit_build_times_mutable(),
first_hop_succeeded);
}
circuit_build_times_mark_circ_as_measurement_only(TO_ORIGIN_CIRCUIT(
victim));
continue;
}