Start noticing and reporting bootstrapping failures too. It looks like

we never bothered learning why OR conns fail, so next step is to add some
infrastructure for that.


svn:r15091
This commit is contained in:
Roger Dingledine 2008-06-09 18:32:43 +00:00
parent dba6d8c55a
commit baa3cea213
6 changed files with 95 additions and 33 deletions

View File

@ -165,7 +165,7 @@ Status: Open
tag=circuit_create "Establishing circuits"
Once we've finished our TLS handshake with an entry guard, we will
set about rying to make some 3-hop circuits in case we need them soon.
set about trying to make some 3-hop circuits in case we need them soon.
Phase 100:
tag=done summary="Done"

View File

@ -497,9 +497,18 @@ connection_about_to_close_connection(connection_t *conn)
if (connection_or_nonopen_was_started_here(or_conn)) {
rep_hist_note_connect_failed(or_conn->identity_digest, now);
entry_guard_register_connect_status(or_conn->identity_digest,0,now);
router_set_status(or_conn->identity_digest, 0);
control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED,
control_tls_error_to_reason(or_conn->tls_error));
if (!get_options()->HttpsProxy)
router_set_status(or_conn->identity_digest, 0);
if (conn->state == OR_CONN_STATE_CONNECTING) {
control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED, 0);
control_event_bootstrap_problem(
tor_socket_strerror(or_conn->socket_error), 0);
} else {
int reason = control_tls_error_to_reason(or_conn->tls_error);
control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED,
reason);
control_event_bootstrap_problem("foo", reason);
}
}
/* Inform any pending (not attached) circs that they should
* give up. */
@ -1842,13 +1851,15 @@ loop_again:
before = buf_datalen(conn->inbuf);
if (connection_read_to_buf(conn, &max_to_read) < 0) {
/* There's a read error; kill the connection.*/
connection_close_immediate(conn); /* Don't flush; connection is dead. */
if (conn->type == CONN_TYPE_OR)
TO_OR_CONN(conn)->socket_error = tor_socket_errno(conn->s);
if (CONN_IS_EDGE(conn)) {
edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
connection_edge_end_errno(edge_conn);
if (edge_conn->socks_request) /* broken, don't send a socks reply back */
edge_conn->socks_request->has_finished = 1;
}
connection_close_immediate(conn); /* Don't flush; connection is dead. */
connection_mark_for_close(conn);
return -1;
}
@ -2145,14 +2156,11 @@ connection_handle_write(connection_t *conn, int force)
log_info(LD_NET,"in-progress connect failed. Removing.");
if (CONN_IS_EDGE(conn))
connection_edge_end_errno(TO_EDGE_CONN(conn));
if (conn->type == CONN_TYPE_OR)
TO_OR_CONN(conn)->socket_error = e;
connection_close_immediate(conn);
connection_mark_for_close(conn);
/* it's safe to pass OPs to router_set_status(), since it just
* ignores unrecognized routers
*/
if (conn->type == CONN_TYPE_OR && !get_options()->HttpsProxy)
router_set_status(TO_OR_CONN(conn)->identity_digest, 0);
return -1;
} else {
return 0; /* no change, see if next time is better */

View File

@ -546,6 +546,8 @@ connection_or_connect(uint32_t addr, uint16_t port, const char *id_digest)
}
control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED,
END_OR_CONN_REASON_TCP_REFUSED);
/* XXX connection_connect() can fail for all sorts of other reasons */
control_event_bootstrap_problem("foo", END_OR_CONN_REASON_TCP_REFUSED);
connection_free(TO_CONN(conn));
return NULL;
case 0:
@ -795,6 +797,7 @@ connection_or_check_valid_tls_handshake(or_connection_t *conn,
router_set_status(conn->identity_digest, 0);
control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED,
END_OR_CONN_REASON_OR_IDENTITY);
control_event_bootstrap_problem("foo", END_OR_CONN_REASON_OR_IDENTITY);
as_advertised = 0;
}
if (authdir_mode_tests_reachability(options)) {

View File

@ -3825,7 +3825,7 @@ init_cookie_authentication(int enabled)
/** Convert the name of a bootstrapping phase <b>s</b> into strings
* <b>tag</b> and <b>summary</b> suitable for display by the controller. */
static void
static int
bootstrap_status_to_string(bootstrap_status_t s, const char **tag,
const char **summary)
{
@ -3887,48 +3887,94 @@ bootstrap_status_to_string(bootstrap_status_t s, const char **tag,
*summary = "Done";
break;
default:
log_warn(LD_BUG, "Unrecognized bootstrap status code %d", s);
// log_warn(LD_BUG, "Unrecognized bootstrap status code %d", s);
*tag = *summary = "unknown";
return -1;
}
return 0;
}
/** What percentage through the bootstrap process are we? We remember
* this so we can avoid sending redundant bootstrap status events, and
* so we can guess context for the bootstrap messages which are
* ambiguous. */
static int bootstrap_percent = 0;
/** How many problems have we had getting to the next bootstrapping phase?
* These include failure to establish a connection to a Tor relay,
* failures to finish the TLS handshake, failures to validate the
* consensus document, etc. */
static int bootstrap_problems = 0;
/* We only tell the controller once we've hit a threshold of problems
* for the current phase. */
#define BOOTSTRAP_PROBLEM_THRESHOLD 10
/** Called when Tor has made progress at bootstrapping its directory
* information and initial circuits. <b>status</b> is the new status,
* that is, what task we will be doing next. <b>percent</b> is zero if
* we just started this task, else it represents progress on the task.
*/
int
control_event_bootstrap(bootstrap_status_t status, int percent)
* information and initial circuits.
*
* <b>status</b> is the new status, that is, what task we will be doing
* next. <b>percent</b> is zero if we just started this task, else it
* represents progress on the task. */
void
control_event_bootstrap(bootstrap_status_t status, int progress)
{
static int last_percent = 0;
const char *tag, *summary;
if (last_percent == 100)
return 0; /* already bootstrapped; nothing to be done here. */
if (bootstrap_percent == 100)
return; /* already bootstrapped; nothing to be done here. */
/* special case for handshaking status, since our TLS handshaking code
* can't distinguish what the connection is going to be for. */
if (status == BOOTSTRAP_STATUS_HANDSHAKE) {
if (last_percent < BOOTSTRAP_STATUS_CONN_OR) {
if (bootstrap_percent < BOOTSTRAP_STATUS_CONN_OR) {
status = BOOTSTRAP_STATUS_HANDSHAKE_DIR;
} else {
status = BOOTSTRAP_STATUS_HANDSHAKE_OR;
}
}
if (status > last_percent || (percent && percent > last_percent)) {
if (status > bootstrap_percent ||
(progress && progress > bootstrap_percent)) {
bootstrap_status_to_string(status, &tag, &summary);
log_notice(LD_CONTROL, "Bootstrapped %d%%: %s.",
percent ? percent : status, summary);
progress ? progress : status, summary);
control_event_client_status(LOG_NOTICE,
"BOOTSTRAP PROGRESS=%d TAG=%s SUMMARY=\"%s\"",
percent ? percent : status, tag, summary);
progress ? progress : status, tag, summary);
if (status > bootstrap_percent) {
bootstrap_percent = status; /* new milestone reached */
}
if (progress > bootstrap_percent) {
/* incremental progress within a milestone */
bootstrap_percent = progress;
}
bootstrap_problems = 0; /* Progress! Reset our problem counter. */
}
if (status > last_percent) /* new milestone reached */
last_percent = status ;
if (percent > last_percent) /* incremental progress within a milestone */
last_percent = percent;
return 0;
}
/* Called when Tor has failed to make bootstrapping progress in a way
* that indicates a problem. <b>warn</b> gives a hint as to why, and
* <b>reason</b> provides an "or_conn_end_reason" tag).
*/
void
control_event_bootstrap_problem(const char *warn, int reason)
{
int status = bootstrap_percent;
const char *tag, *summary;
// if (++bootstrap_problems != BOOTSTRAP_PROBLEM_THRESHOLD)
// return; /* no worries yet */
while (bootstrap_status_to_string(status, &tag, &summary) < 0)
status--; /* find a recognized status string based on current progress */
log_warn(LD_CONTROL, "Problem bootstrapping. Stuck at %d%%: %s. (%s; %s)",
status, summary, warn,
or_conn_end_reason_to_string(reason));
control_event_client_status(LOG_WARN,
"BOOTSTRAP PROGRESS=%d TAG=%s SUMMARY=\"%s\" WARNING=\"%s\" %s",
bootstrap_percent, tag, summary, warn,
or_conn_end_reason_to_string(reason));
}

View File

@ -2186,7 +2186,8 @@ note_client_request(int purpose, int compressed, size_t bytes)
case DIR_PURPOSE_FETCH_CONSENSUS: kind = "dl/consensus"; break;
case DIR_PURPOSE_FETCH_CERTIFICATE: kind = "dl/cert"; break;
case DIR_PURPOSE_FETCH_STATUS_VOTE: kind = "dl/vote"; break;
case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: kind = "dl/detached_sig"; break;
case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: kind = "dl/detached_sig";
break;
case DIR_PURPOSE_FETCH_SERVERDESC: kind = "dl/server"; break;
case DIR_PURPOSE_FETCH_EXTRAINFO: kind = "dl/extra"; break;
case DIR_PURPOSE_UPLOAD_DIR: kind = "dl/ul-dir"; break;

View File

@ -905,6 +905,9 @@ typedef struct or_connection_t {
tor_tls_t *tls; /**< TLS connection state. */
int tls_error; /**< Last tor_tls error code. */
/* XXX either merge this with tls_error, or do all our activity right
* when we compute this value so we don't have to store it. */
int socket_error; /**< If conn dies, remember why. */
/** When we last used this conn for any client traffic. If not
* recent, we can rate limit it further. */
time_t client_used;
@ -3035,7 +3038,8 @@ typedef enum {
BOOTSTRAP_STATUS_DONE=100
} bootstrap_status_t;
int control_event_bootstrap(bootstrap_status_t status, int percent);
void control_event_bootstrap(bootstrap_status_t status, int progress);
void control_event_bootstrap_problem(const char *warn, int reason);
#ifdef CONTROL_PRIVATE
/* Used only by control.c and test.c */