From 937260af6a88680616aac578f3def329c7d9e3fe Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Sun, 1 Apr 2018 03:52:34 +0000 Subject: [PATCH] Bug 25705: Don't count circuit path failures as build failures. Also emit a rate limited log message when they happen, since they are likely correlated with other issues. --- changes/bug25705 | 5 +++++ src/or/circuituse.c | 45 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 11 deletions(-) create mode 100644 changes/bug25705 diff --git a/changes/bug25705 b/changes/bug25705 new file mode 100644 index 0000000000..360d96d4c6 --- /dev/null +++ b/changes/bug25705 @@ -0,0 +1,5 @@ + o Minor bugfixes (circuit path selection): + - Don't count path selection failures as circuit build failures. This + should eliminate cases where Tor blames its guard or the network + for situations like insufficient microdescriptors and/or overly + restrictive torrc settings. Fixes bug 25705; bugfix on 0.3.3.1-alpha. diff --git a/src/or/circuituse.c b/src/or/circuituse.c index 3125fff650..93b7b2023f 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -1757,6 +1757,39 @@ circuit_build_failed(origin_circuit_t *circ) * the last hop or an earlier hop. then use this info below. */ int failed_at_last_hop = 0; + + /* First, check to see if this was a path failure, rather than build + * failure. + * + * Note that we deliberately use circuit_get_cpath_len() (and not + * circuit_get_cpath_opened_len()) because we only want to ensure + * that a full path is *chosen*. This is different than a full path + * being *built*. We only want to count *build* failures below. + * + * Path selection failures can happen spuriously for a number + * of reasons (such as aggressive/invalid user-specified path + * restrictions in the torrc, insufficient microdescriptors, and + * non-user reasons like exitpolicy issues), and so should not be + * counted as failures below. + */ + if (circuit_get_cpath_len(circ) < circ->build_state->desired_path_len) { + static ratelim_t pathfail_limit = RATELIM_INIT(3600); + log_fn_ratelim(&pathfail_limit, LOG_NOTICE, LD_CIRC, + "Our circuit %u (id: %" PRIu32 ") died due to an invalid " + "selected path, purpose %s. This may be a torrc " + "configuration issue, or a bug.", + TO_CIRCUIT(circ)->n_circ_id, circ->global_identifier, + circuit_purpose_to_string(TO_CIRCUIT(circ)->purpose)); + + /* If the path failed on an RP, retry it. */ + if (TO_CIRCUIT(circ)->purpose == CIRCUIT_PURPOSE_S_CONNECT_REND) + hs_circ_retry_service_rendezvous_point(circ); + + /* In all other cases, just bail. The rest is just failure accounting + * that we don't want to do */ + return; + } + /* If the last hop isn't open, and the second-to-last is, we failed * at the last hop. */ if (circ->cpath && @@ -1806,18 +1839,8 @@ circuit_build_failed(origin_circuit_t *circ) * If we have guard state (new guard API) and our path selection * code actually chose a full path, then blame the failure of this * circuit on the guard. - * - * Note that we deliberately use circuit_get_cpath_len() (and not - * circuit_get_cpath_opened_len()) because we only want to ensure - * that a full path is *chosen*. This is different than a full path - * being *built*. We only want to blame *build* failures on this - * guard. Path selection failures can happen spuriously for a number - * of reasons (such as aggressive/invalid user-specified path - * restrictions in the torrc, as well as non-user reasons like - * exitpolicy issues), and so should not be counted here. */ - if (circ->guard_state && - circuit_get_cpath_len(circ) >= circ->build_state->desired_path_len) + if (circ->guard_state) entry_guard_failed(&circ->guard_state); /* if there are any one-hop streams waiting on this circuit, fail * them now so they can retry elsewhere. */