Merge branch 'feature3116_squashed'

This commit is contained in:
Nick Mathewson 2011-07-11 16:16:11 -04:00
commit e97eb6f254
13 changed files with 334 additions and 102 deletions

5
changes/feature3116 Normal file
View File

@ -0,0 +1,5 @@
o Major features:
- While we're trying to bootstrap, record how many TLS connections
fail in each state, and report which states saw the most failures
in response to any bootstrap failures. This feature may speed up
diagnosis of censorship events.

View File

@ -0,0 +1,3 @@
o Code simplification and refactoring:
- Split connection_about_to_close into separate functions for each
connection type.

View File

@ -222,6 +222,40 @@ ssl_state_to_string(int ssl_state)
return buf;
}
/** Write a description of the current state of <b>tls</b> into the
* <b>sz</b>-byte buffer at <b>buf</b>. */
void
tor_tls_get_state_description(tor_tls_t *tls, char *buf, size_t sz)
{
const char *ssl_state;
const char *tortls_state;
if (PREDICT_UNLIKELY(!tls || !tls->ssl)) {
strlcpy(buf, "(No SSL object)", sz);
return;
}
ssl_state = ssl_state_to_string(tls->ssl->state);
switch (tls->state) {
#define CASE(st) case TOR_TLS_ST_##st: tortls_state = " in "#st ; break
CASE(HANDSHAKE);
CASE(OPEN);
CASE(GOTCLOSE);
CASE(SENTCLOSE);
CASE(CLOSED);
CASE(RENEGOTIATE);
#undef CASE
case TOR_TLS_ST_BUFFEREVENT:
tortls_state = "";
break;
default:
tortls_state = " in unknown TLS state";
break;
}
tor_snprintf(buf, sz, "%s%s", ssl_state, tortls_state);
}
void
tor_tls_log_one_error(tor_tls_t *tls, unsigned long err,
int severity, int domain, const char *doing)

View File

@ -48,6 +48,7 @@ typedef struct tor_tls_t tor_tls_t;
#define TOR_TLS_IS_ERROR(rv) ((rv) < TOR_TLS_CLOSE)
const char *tor_tls_err_to_string(int err);
void tor_tls_get_state_description(tor_tls_t *tls, char *buf, size_t sz);
void tor_tls_free_all(void);
int tor_tls_context_init(int is_public_server,

View File

@ -2117,6 +2117,7 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
"Looks like client functionality is working.");
control_event_bootstrap(BOOTSTRAP_STATUS_DONE, 0);
control_event_client_status(LOG_NOTICE, "CIRCUIT_ESTABLISHED");
clear_broken_connection_map(1);
if (server_mode(options) && !check_whether_orport_reachable()) {
inform_testing_reachability();
consider_testing_reachability(1, 1);

View File

@ -554,6 +554,9 @@ connection_free_all(void)
/* Unlink everything from the identity map. */
connection_or_clear_identity_map();
/* Clear out our list of broken connections */
clear_broken_connection_map(0);
SMARTLIST_FOREACH(conns, connection_t *, conn, _connection_free(conn));
if (outgoing_addrs) {
@ -563,7 +566,9 @@ connection_free_all(void)
}
}
/** Do any cleanup needed:
/**
* Called when we're about to finally unlink and free a connection:
* perform necessary accounting and cleanup
* - Directory conns that failed to fetch a rendezvous descriptor
* need to inform pending rendezvous streams.
* - OR conns need to call rep_hist_note_*() to record status.
@ -576,114 +581,20 @@ connection_free_all(void)
void
connection_about_to_close_connection(connection_t *conn)
{
circuit_t *circ;
dir_connection_t *dir_conn;
or_connection_t *or_conn;
edge_connection_t *edge_conn;
time_t now = time(NULL);
tor_assert(conn->marked_for_close);
if (CONN_IS_EDGE(conn)) {
edge_conn = TO_EDGE_CONN(conn);
if (!edge_conn->edge_has_sent_end) {
log_warn(LD_BUG, "(Harmless.) Edge connection (marked at %s:%d) "
"hasn't sent end yet?",
conn->marked_for_close_file, conn->marked_for_close);
tor_fragile_assert();
}
}
switch (conn->type) {
case CONN_TYPE_DIR:
dir_conn = TO_DIR_CONN(conn);
if (conn->state < DIR_CONN_STATE_CLIENT_FINISHED) {
/* It's a directory connection and connecting or fetching
* failed: forget about this router, and maybe try again. */
connection_dir_request_failed(dir_conn);
}
/* If we were trying to fetch a v2 rend desc and did not succeed,
* retry as needed. (If a fetch is successful, the connection state
* is changed to DIR_PURPOSE_HAS_FETCHED_RENDDESC to mark that
* refetching is unnecessary.) */
if (conn->purpose == DIR_PURPOSE_FETCH_RENDDESC_V2 &&
dir_conn->rend_data &&
strlen(dir_conn->rend_data->onion_address) ==
REND_SERVICE_ID_LEN_BASE32)
rend_client_refetch_v2_renddesc(dir_conn->rend_data);
connection_dir_about_to_close(TO_DIR_CONN(conn));
break;
case CONN_TYPE_OR:
or_conn = TO_OR_CONN(conn);
/* Remember why we're closing this connection. */
if (conn->state != OR_CONN_STATE_OPEN) {
/* Inform any pending (not attached) circs that they should
* give up. */
circuit_n_conn_done(TO_OR_CONN(conn), 0);
/* now mark things down as needed */
if (connection_or_nonopen_was_started_here(or_conn)) {
const or_options_t *options = get_options();
rep_hist_note_connect_failed(or_conn->identity_digest, now);
entry_guard_register_connect_status(or_conn->identity_digest,0,
!options->HTTPSProxy, now);
if (conn->state >= OR_CONN_STATE_TLS_HANDSHAKING) {
int reason = tls_error_to_orconn_end_reason(or_conn->tls_error);
control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED,
reason);
if (!authdir_mode_tests_reachability(options))
control_event_bootstrap_problem(
orconn_end_reason_to_control_string(reason), reason);
}
}
} else if (conn->hold_open_until_flushed) {
/* We only set hold_open_until_flushed when we're intentionally
* closing a connection. */
rep_hist_note_disconnect(or_conn->identity_digest, now);
control_event_or_conn_status(or_conn, OR_CONN_EVENT_CLOSED,
tls_error_to_orconn_end_reason(or_conn->tls_error));
} else if (!tor_digest_is_zero(or_conn->identity_digest)) {
rep_hist_note_connection_died(or_conn->identity_digest, now);
control_event_or_conn_status(or_conn, OR_CONN_EVENT_CLOSED,
tls_error_to_orconn_end_reason(or_conn->tls_error));
}
/* Now close all the attached circuits on it. */
circuit_unlink_all_from_or_conn(TO_OR_CONN(conn),
END_CIRC_REASON_OR_CONN_CLOSED);
connection_or_about_to_close(TO_OR_CONN(conn));
break;
case CONN_TYPE_AP:
edge_conn = TO_EDGE_CONN(conn);
if (edge_conn->socks_request->has_finished == 0) {
/* since conn gets removed right after this function finishes,
* there's no point trying to send back a reply at this point. */
log_warn(LD_BUG,"Closing stream (marked at %s:%d) without sending"
" back a socks reply.",
conn->marked_for_close_file, conn->marked_for_close);
}
if (!edge_conn->end_reason) {
log_warn(LD_BUG,"Closing stream (marked at %s:%d) without having"
" set end_reason.",
conn->marked_for_close_file, conn->marked_for_close);
}
if (edge_conn->dns_server_request) {
log_warn(LD_BUG,"Closing stream (marked at %s:%d) without having"
" replied to DNS request.",
conn->marked_for_close_file, conn->marked_for_close);
dnsserv_reject_request(edge_conn);
}
control_event_stream_bandwidth(edge_conn);
control_event_stream_status(edge_conn, STREAM_EVENT_CLOSED,
edge_conn->end_reason);
circ = circuit_get_by_edge_conn(edge_conn);
if (circ)
circuit_detach_stream(circ, edge_conn);
connection_ap_about_to_close(TO_EDGE_CONN(conn));
break;
case CONN_TYPE_EXIT:
edge_conn = TO_EDGE_CONN(conn);
circ = circuit_get_by_edge_conn(edge_conn);
if (circ)
circuit_detach_stream(circ, edge_conn);
if (conn->state == EXIT_CONN_STATE_RESOLVING) {
connection_dns_remove(edge_conn);
}
connection_exit_about_to_close(TO_EDGE_CONN(conn));
break;
}
}

View File

@ -408,6 +408,71 @@ connection_edge_finished_connecting(edge_connection_t *edge_conn)
return connection_edge_process_inbuf(edge_conn, 1);
}
/** Common code to connection_(ap|exit)_about_to_close. */
static void
connection_edge_about_to_close(edge_connection_t *edge_conn)
{
if (!edge_conn->edge_has_sent_end) {
connection_t *conn = TO_CONN(edge_conn);
log_warn(LD_BUG, "(Harmless.) Edge connection (marked at %s:%d) "
"hasn't sent end yet?",
conn->marked_for_close_file, conn->marked_for_close);
tor_fragile_assert();
}
}
/* Called when we're about to finally unlink and free an AP (client)
* connection: perform necessary accounting and cleanup */
void
connection_ap_about_to_close(edge_connection_t *edge_conn)
{
circuit_t *circ;
connection_t *conn = TO_CONN(edge_conn);
if (edge_conn->socks_request->has_finished == 0) {
/* since conn gets removed right after this function finishes,
* there's no point trying to send back a reply at this point. */
log_warn(LD_BUG,"Closing stream (marked at %s:%d) without sending"
" back a socks reply.",
conn->marked_for_close_file, conn->marked_for_close);
}
if (!edge_conn->end_reason) {
log_warn(LD_BUG,"Closing stream (marked at %s:%d) without having"
" set end_reason.",
conn->marked_for_close_file, conn->marked_for_close);
}
if (edge_conn->dns_server_request) {
log_warn(LD_BUG,"Closing stream (marked at %s:%d) without having"
" replied to DNS request.",
conn->marked_for_close_file, conn->marked_for_close);
dnsserv_reject_request(edge_conn);
}
control_event_stream_bandwidth(edge_conn);
control_event_stream_status(edge_conn, STREAM_EVENT_CLOSED,
edge_conn->end_reason);
circ = circuit_get_by_edge_conn(edge_conn);
if (circ)
circuit_detach_stream(circ, edge_conn);
}
/* Called when we're about to finally unlink and free an exit
* connection: perform necessary accounting and cleanup */
void
connection_exit_about_to_close(edge_connection_t *edge_conn)
{
circuit_t *circ;
connection_t *conn = TO_CONN(edge_conn);
connection_edge_about_to_close(edge_conn);
circ = circuit_get_by_edge_conn(edge_conn);
if (circ)
circuit_detach_stream(circ, edge_conn);
if (conn->state == EXIT_CONN_STATE_RESOLVING) {
connection_dns_remove(edge_conn);
}
}
/** Define a schedule for how long to wait between retrying
* application connections. Rather than waiting a fixed amount of
* time between each retry, we wait 10 seconds each for the first

View File

@ -27,6 +27,9 @@ int connection_edge_flushed_some(edge_connection_t *conn);
int connection_edge_finished_flushing(edge_connection_t *conn);
int connection_edge_finished_connecting(edge_connection_t *conn);
void connection_ap_about_to_close(edge_connection_t *edge_conn);
void connection_exit_about_to_close(edge_connection_t *edge_conn);
int connection_ap_handshake_send_begin(edge_connection_t *ap_conn);
int connection_ap_handshake_send_resolve(edge_connection_t *ap_conn);

View File

@ -13,6 +13,7 @@
#include "or.h"
#include "buffers.h"
#include "circuitbuild.h"
#include "circuitlist.h"
#include "command.h"
#include "config.h"
#include "connection.h"
@ -149,6 +150,136 @@ connection_or_set_identity_digest(or_connection_t *conn, const char *digest)
#endif
}
/**************************************************************/
/** Map from a string describing what a non-open OR connection was doing when
* failed, to an intptr_t describing the count of connections that failed that
* way. Note that the count is stored _as_ the pointer.
*/
static strmap_t *broken_connection_counts;
/** If true, do not record information in <b>broken_connection_counts</b>. */
static int disable_broken_connection_counts = 0;
/** Record that an OR connection failed in <b>state</b>. */
static void
note_broken_connection(const char *state)
{
void *ptr;
intptr_t val;
if (disable_broken_connection_counts)
return;
if (!broken_connection_counts)
broken_connection_counts = strmap_new();
ptr = strmap_get(broken_connection_counts, state);
val = (intptr_t)ptr;
val++;
ptr = (void*)val;
strmap_set(broken_connection_counts, state, ptr);
}
/** Forget all recorded states for failed connections. If
* <b>stop_recording</b> is true, don't record any more. */
void
clear_broken_connection_map(int stop_recording)
{
if (broken_connection_counts)
strmap_free(broken_connection_counts, NULL);
broken_connection_counts = NULL;
if (stop_recording)
disable_broken_connection_counts = 1;
}
/** Write a detailed description the state of <b>orconn</b> into the
* <b>buflen</b>-byte buffer at <b>buf</b>. This description includes not
* only the OR-conn level state but also the TLS state. It's useful for
* diagnosing broken handshakes. */
static void
connection_or_get_state_description(or_connection_t *orconn,
char *buf, size_t buflen)
{
connection_t *conn = TO_CONN(orconn);
const char *conn_state;
char tls_state[256];
tor_assert(conn->type == CONN_TYPE_OR);
conn_state = conn_state_to_string(conn->type, conn->state);
tor_tls_get_state_description(orconn->tls, tls_state, sizeof(tls_state));
tor_snprintf(buf, buflen, "%s with SSL state %s", conn_state, tls_state);
}
/** Record the current state of <b>orconn</b> as the state of a broken
* connection. */
static void
connection_or_note_state_when_broken(or_connection_t *orconn)
{
char buf[256];
if (disable_broken_connection_counts)
return;
connection_or_get_state_description(orconn, buf, sizeof(buf));
log_info(LD_HANDSHAKE,"Connection died in state '%s'", buf);
note_broken_connection(buf);
}
/** Helper type used to sort connection states and find the most frequent. */
typedef struct broken_state_count_t {
intptr_t count;
const char *state;
} broken_state_count_t;
/** Helper function used to sort broken_state_count_t by frequency. */
static int
broken_state_count_compare(const void **a_ptr, const void **b_ptr)
{
const broken_state_count_t *a = *a_ptr, *b = *b_ptr;
return b->count - a->count;
}
/** Upper limit on the number of different states to report for connection
* failure. */
#define MAX_REASONS_TO_REPORT 10
/** Report a list of the top states for failed OR connections at log level
* <b>severity</b>, in log domain <b>domain</b>. */
void
connection_or_report_broken_states(int severity, int domain)
{
int total = 0;
smartlist_t *items;
if (!broken_connection_counts || disable_broken_connection_counts)
return;
items = smartlist_create();
STRMAP_FOREACH(broken_connection_counts, state, void *, countptr) {
broken_state_count_t *c = tor_malloc(sizeof(broken_state_count_t));
total += c->count = (intptr_t)countptr;
c->state = state;
smartlist_add(items, c);
} STRMAP_FOREACH_END;
smartlist_sort(items, broken_state_count_compare);
log(severity, domain, "%d connections have failed%s", total,
smartlist_len(items) > MAX_REASONS_TO_REPORT ? ". Top reasons:" : ":");
SMARTLIST_FOREACH_BEGIN(items, const broken_state_count_t *, c) {
if (c_sl_idx > MAX_REASONS_TO_REPORT)
break;
log(severity, domain,
" %d connections died in state %s", (int)c->count, c->state);
} SMARTLIST_FOREACH_END(c);
SMARTLIST_FOREACH(items, broken_state_count_t *, c, tor_free(c));
smartlist_free(items);
}
/**************************************************************/
/** Pack the cell_t host-order structure <b>src</b> into network-order
* in the buffer <b>dest</b>. See tor-spec.txt for details about the
* wire format.
@ -347,6 +478,51 @@ connection_or_finished_connecting(or_connection_t *or_conn)
return 0;
}
/* Called when we're about to finally unlink and free an OR connection:
* perform necessary accounting and cleanup */
void
connection_or_about_to_close(or_connection_t *or_conn)
{
time_t now = time(NULL);
connection_t *conn = TO_CONN(or_conn);
/* Remember why we're closing this connection. */
if (conn->state != OR_CONN_STATE_OPEN) {
/* Inform any pending (not attached) circs that they should
* give up. */
circuit_n_conn_done(TO_OR_CONN(conn), 0);
/* now mark things down as needed */
if (connection_or_nonopen_was_started_here(or_conn)) {
const or_options_t *options = get_options();
connection_or_note_state_when_broken(or_conn);
rep_hist_note_connect_failed(or_conn->identity_digest, now);
entry_guard_register_connect_status(or_conn->identity_digest,0,
!options->HTTPSProxy, now);
if (conn->state >= OR_CONN_STATE_TLS_HANDSHAKING) {
int reason = tls_error_to_orconn_end_reason(or_conn->tls_error);
control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED,
reason);
if (!authdir_mode_tests_reachability(options))
control_event_bootstrap_problem(
orconn_end_reason_to_control_string(reason), reason);
}
}
} else if (conn->hold_open_until_flushed) {
/* We only set hold_open_until_flushed when we're intentionally
* closing a connection. */
rep_hist_note_disconnect(or_conn->identity_digest, now);
control_event_or_conn_status(or_conn, OR_CONN_EVENT_CLOSED,
tls_error_to_orconn_end_reason(or_conn->tls_error));
} else if (!tor_digest_is_zero(or_conn->identity_digest)) {
rep_hist_note_connection_died(or_conn->identity_digest, now);
control_event_or_conn_status(or_conn, OR_CONN_EVENT_CLOSED,
tls_error_to_orconn_end_reason(or_conn->tls_error));
}
/* Now close all the attached circuits on it. */
circuit_unlink_all_from_or_conn(TO_OR_CONN(conn),
END_CIRC_REASON_OR_CONN_CLOSED);
}
/** Return 1 if identity digest <b>id_digest</b> is known to be a
* currently or recently running relay. Otherwise return 0. */
int

View File

@ -14,6 +14,7 @@
void connection_or_remove_from_identity_map(or_connection_t *conn);
void connection_or_clear_identity_map(void);
void clear_broken_connection_map(int disable);
or_connection_t *connection_or_get_for_extend(const char *digest,
const tor_addr_t *target_addr,
const char **msg_out,
@ -25,6 +26,7 @@ int connection_or_process_inbuf(or_connection_t *conn);
int connection_or_flushed_some(or_connection_t *conn);
int connection_or_finished_flushing(or_connection_t *conn);
int connection_or_finished_connecting(or_connection_t *conn);
void connection_or_about_to_close(or_connection_t *conn);
int connection_or_digest_is_known_relay(const char *id_digest);
void connection_or_update_token_buckets(smartlist_t *conns,
const or_options_t *options);
@ -34,6 +36,8 @@ void connection_or_connect_failed(or_connection_t *conn,
or_connection_t *connection_or_connect(const tor_addr_t *addr, uint16_t port,
const char *id_digest);
void connection_or_report_broken_states(int severity, int domain);
int connection_tls_start_handshake(or_connection_t *conn, int receiving);
int connection_tls_continue_handshake(or_connection_t *conn);

View File

@ -18,6 +18,7 @@
#include "config.h"
#include "connection.h"
#include "connection_edge.h"
#include "connection_or.h"
#include "control.h"
#include "directory.h"
#include "dirserv.h"
@ -4268,6 +4269,7 @@ control_event_bootstrap_problem(const char *warn, int reason)
const char *tag, *summary;
char buf[BOOTSTRAP_MSG_LEN];
const char *recommendation = "ignore";
int severity;
/* bootstrap_percent must not be in "undefined" state here. */
tor_assert(status >= 0);
@ -4292,12 +4294,17 @@ control_event_bootstrap_problem(const char *warn, int reason)
status--; /* find a recognized status string based on current progress */
status = bootstrap_percent; /* set status back to the actual number */
log_fn(!strcmp(recommendation, "warn") ? LOG_WARN : LOG_INFO,
severity = !strcmp(recommendation, "warn") ? LOG_WARN : LOG_INFO;
log_fn(severity,
LD_CONTROL, "Problem bootstrapping. Stuck at %d%%: %s. (%s; %s; "
"count %d; recommendation %s)",
status, summary, warn,
orconn_end_reason_to_control_string(reason),
bootstrap_problems, recommendation);
connection_or_report_broken_states(severity, LD_HANDSHAKE);
tor_snprintf(buf, sizeof(buf),
"BOOTSTRAP PROGRESS=%d TAG=%s SUMMARY=\"%s\" WARNING=\"%s\" REASON=%s "
"COUNT=%d RECOMMENDATION=%s",

View File

@ -645,7 +645,7 @@ directory_conn_is_self_reachability_test(dir_connection_t *conn)
* server due to a network error: Mark the router as down and try again if
* possible.
*/
void
static void
connection_dir_request_failed(dir_connection_t *conn)
{
if (directory_conn_is_self_reachability_test(conn)) {
@ -2301,6 +2301,28 @@ connection_dir_process_inbuf(dir_connection_t *conn)
return 0;
}
/** Called when we're about to finally unlink and free a directory connection:
* perform necessary accounting and cleanup */
void
connection_dir_about_to_close(dir_connection_t *dir_conn)
{
connection_t *conn = TO_CONN(dir_conn);
if (conn->state < DIR_CONN_STATE_CLIENT_FINISHED) {
/* It's a directory connection and connecting or fetching
* failed: forget about this router, and maybe try again. */
connection_dir_request_failed(dir_conn);
}
/* If we were trying to fetch a v2 rend desc and did not succeed,
* retry as needed. (If a fetch is successful, the connection state
* is changed to DIR_PURPOSE_HAS_FETCHED_RENDDESC to mark that
* refetching is unnecessary.) */
if (conn->purpose == DIR_PURPOSE_FETCH_RENDDESC_V2 &&
dir_conn->rend_data &&
strlen(dir_conn->rend_data->onion_address) == REND_SERVICE_ID_LEN_BASE32)
rend_client_refetch_v2_renddesc(dir_conn->rend_data);
}
/** Create an http response for the client <b>conn</b> out of
* <b>status</b> and <b>reason_phrase</b>. Write it to <b>conn</b>.
*/

View File

@ -48,7 +48,7 @@ int connection_dir_reached_eof(dir_connection_t *conn);
int connection_dir_process_inbuf(dir_connection_t *conn);
int connection_dir_finished_flushing(dir_connection_t *conn);
int connection_dir_finished_connecting(dir_connection_t *conn);
void connection_dir_request_failed(dir_connection_t *conn);
void connection_dir_about_to_close(dir_connection_t *dir_conn);
void directory_initiate_command(const char *address, const tor_addr_t *addr,
uint16_t or_port, uint16_t dir_port,
int supports_conditional_consensus,