diff --git a/changes/bug24767 b/changes/bug24767 new file mode 100644 index 0000000000..56fbe51a98 --- /dev/null +++ b/changes/bug24767 @@ -0,0 +1,5 @@ + o Major bugfixes (relay, connection): + - Refuse to connect again to a relay from which we failed previously with + a connection refused, timeout or error (at the TCP level). The relay + won't be retried for 60 seconds after the failure occured. Fixes bug + 24767; bugfix on 0.0.6. diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 272a086a32..267463312c 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -28,6 +28,7 @@ * part of a subclass (channel_tls_t). */ #define TOR_CHANNEL_INTERNAL_ +#define CONNECTION_OR_PRIVATE #include "channel.h" #include "channeltls.h" #include "circuitbuild.h" @@ -1122,6 +1123,216 @@ connection_or_group_set_badness_(smartlist_t *group, int force) } SMARTLIST_FOREACH_END(or_conn); } +/* Lifetime of a connection failure. After that, we'll retry. This is in + * seconds. */ +#define OR_CONNECT_FAILURE_LIFETIME 60 +/* The interval to use with when to clean up the failure cache. */ +#define OR_CONNECT_FAILURE_CLEANUP_INTERVAL 60 + +/* When is the next time we have to cleanup the failure map. We keep this + * because we clean it opportunistically. */ +static time_t or_connect_failure_map_next_cleanup_ts = 0; + +/* OR connection failure entry data structure. It is kept in the connection + * failure map defined below and indexed by OR identity digest, address and + * port. + * + * We need to identify a connection failure with these three values because we + * want to avoid to wrongfully blacklist a relay if someone is trying to + * extend to a known identity digest but with the wrong IP/port. For instance, + * it can happen if a relay changed its port but the client still has an old + * descriptor with the old port. We want to stop connecting to that + * IP/port/identity all together, not only the relay identity. */ +typedef struct or_connect_failure_entry_t { + HT_ENTRY(or_connect_failure_entry_t) node; + /* Identity digest of the connection where it is connecting to. */ + uint8_t identity_digest[DIGEST_LEN]; + /* This is the connection address from the base connection_t. After the + * connection is checked for canonicity, the base address should represent + * what we know instead of where we are connecting to. This is what we need + * so we can correlate known relays within the consensus. */ + tor_addr_t addr; + uint16_t port; + /* Last time we were unable to connect. */ + time_t last_failed_connect_ts; +} or_connect_failure_entry_t; + +/* Map where we keep connection failure entries. They are indexed by addr, + * port and identity digest. */ +static HT_HEAD(or_connect_failure_ht, or_connect_failure_entry_t) + or_connect_failures_map = HT_INITIALIZER(); + +/* Helper: Hashtable equal function. Return 1 if equal else 0. */ +static int +or_connect_failure_ht_eq(const or_connect_failure_entry_t *a, + const or_connect_failure_entry_t *b) +{ + return fast_memeq(a->identity_digest, b->identity_digest, DIGEST_LEN) && + tor_addr_eq(&a->addr, &b->addr) && + a->port == b->port; +} + +/* Helper: Return the hash for the hashtable of the given entry. For this + * table, it is a combination of address, port and identity digest. */ +static unsigned int +or_connect_failure_ht_hash(const or_connect_failure_entry_t *entry) +{ + size_t offset = 0, addr_size; + const void *addr_ptr; + /* Largest size is IPv6 and IPv4 is smaller so it is fine. */ + uint8_t data[16 + sizeof(uint16_t) + DIGEST_LEN]; + + /* Get the right address bytes depending on the family. */ + switch (tor_addr_family(&entry->addr)) { + case AF_INET: + addr_size = 4; + addr_ptr = &entry->addr.addr.in_addr.s_addr; + break; + case AF_INET6: + addr_size = 16; + addr_ptr = &entry->addr.addr.in6_addr.s6_addr; + break; + default: + tor_assert_nonfatal_unreached(); + return 0; + } + + memcpy(data, addr_ptr, addr_size); + offset += addr_size; + memcpy(data + offset, entry->identity_digest, DIGEST_LEN); + offset += DIGEST_LEN; + set_uint16(data + offset, entry->port); + offset += sizeof(uint16_t); + + return (unsigned int) siphash24g(data, offset); +} + +HT_PROTOTYPE(or_connect_failure_ht, or_connect_failure_entry_t, node, + or_connect_failure_ht_hash, or_connect_failure_ht_eq) + +HT_GENERATE2(or_connect_failure_ht, or_connect_failure_entry_t, node, + or_connect_failure_ht_hash, or_connect_failure_ht_eq, + 0.6, tor_reallocarray_, tor_free_) + +/* Initialize a given connect failure entry with the given identity_digest, + * addr and port. All field are optional except ocf. */ +static void +or_connect_failure_init(const char *identity_digest, const tor_addr_t *addr, + uint16_t port, or_connect_failure_entry_t *ocf) +{ + tor_assert(ocf); + if (identity_digest) { + memcpy(ocf->identity_digest, identity_digest, + sizeof(ocf->identity_digest)); + } + if (addr) { + tor_addr_copy(&ocf->addr, addr); + } + ocf->port = port; +} + +/* Return a newly allocated connection failure entry. It is initialized with + * the given or_conn data. This can't fail. */ +static or_connect_failure_entry_t * +or_connect_failure_new(const or_connection_t *or_conn) +{ + or_connect_failure_entry_t *ocf = tor_malloc_zero(sizeof(*ocf)); + or_connect_failure_init(or_conn->identity_digest, &or_conn->real_addr, + TO_CONN(or_conn)->port, ocf); + return ocf; +} + +/* Return a connection failure entry matching the given or_conn. NULL is + * returned if not found. */ +static or_connect_failure_entry_t * +or_connect_failure_find(const or_connection_t *or_conn) +{ + or_connect_failure_entry_t lookup; + tor_assert(or_conn); + or_connect_failure_init(or_conn->identity_digest, &TO_CONN(or_conn)->addr, + TO_CONN(or_conn)->port, &lookup); + return HT_FIND(or_connect_failure_ht, &or_connect_failures_map, &lookup); +} + +/* Note down in the connection failure cache that a failure occurred on the + * given or_conn. */ +STATIC void +note_or_connect_failed(const or_connection_t *or_conn) +{ + or_connect_failure_entry_t *ocf = NULL; + + tor_assert(or_conn); + + ocf = or_connect_failure_find(or_conn); + if (ocf == NULL) { + ocf = or_connect_failure_new(or_conn); + HT_INSERT(or_connect_failure_ht, &or_connect_failures_map, ocf); + } + ocf->last_failed_connect_ts = approx_time(); +} + +/* Cleanup the connection failure cache and remove all entries below the + * given cutoff. */ +static void +or_connect_failure_map_cleanup(time_t cutoff) +{ + or_connect_failure_entry_t **ptr, **next, *entry; + + for (ptr = HT_START(or_connect_failure_ht, &or_connect_failures_map); + ptr != NULL; ptr = next) { + entry = *ptr; + if (entry->last_failed_connect_ts <= cutoff) { + next = HT_NEXT_RMV(or_connect_failure_ht, &or_connect_failures_map, ptr); + tor_free(entry); + } else { + next = HT_NEXT(or_connect_failure_ht, &or_connect_failures_map, ptr); + } + } +} + +/* Return true iff the given OR connection can connect to its destination that + * is the triplet identity_digest, address and port. + * + * The or_conn MUST have gone through connection_or_check_canonicity() so the + * base address is properly set to what we know or doesn't know. */ +STATIC int +should_connect_to_relay(const or_connection_t *or_conn) +{ + time_t now, cutoff; + time_t connect_failed_since_ts = 0; + or_connect_failure_entry_t *ocf; + + tor_assert(or_conn); + + now = approx_time(); + cutoff = now - OR_CONNECT_FAILURE_LIFETIME; + + /* Opportunistically try to cleanup the failure cache. We do that at regular + * interval so it doesn't grow too big. */ + if (or_connect_failure_map_next_cleanup_ts <= now) { + or_connect_failure_map_cleanup(cutoff); + or_connect_failure_map_next_cleanup_ts = + now + OR_CONNECT_FAILURE_CLEANUP_INTERVAL; + } + + /* Look if we have failed previously to the same destination as this + * OR connection. */ + ocf = or_connect_failure_find(or_conn); + if (ocf) { + connect_failed_since_ts = ocf->last_failed_connect_ts; + } + /* If we do have an unable to connect timestamp and it is below cutoff, we + * can connect. Or we have never failed before so let it connect. */ + if (connect_failed_since_ts > cutoff) { + goto no_connect; + } + + /* Ok we can connect! */ + return 1; + no_connect: + return 0; +} + /** conn is in the 'connecting' state, and it failed to complete * a TCP connection. Send notifications appropriately. * @@ -1135,6 +1346,7 @@ connection_or_connect_failed(or_connection_t *conn, control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED, reason); if (!authdir_mode_tests_reachability(get_options())) control_event_bootstrap_prob_or(msg, reason, conn); + note_or_connect_failed(conn); } /** conn got an error in connection_handle_read_impl() or @@ -1225,6 +1437,19 @@ connection_or_connect, (const tor_addr_t *_addr, uint16_t port, conn->chan = chan; chan->conn = conn; connection_or_init_conn_from_address(conn, &addr, port, id_digest, ed_id, 1); + + /* We have a proper OR connection setup, now check if we can connect to it + * that is we haven't had a failure earlier. This is to avoid to try to + * constantly connect to relays that we think are not reachable. */ + if (!should_connect_to_relay(conn)) { + log_info(LD_GENERAL, "Can't connect to identity %s at %s:%u because we " + "failed earlier. Refusing.", + hex_str(id_digest, DIGEST_LEN), fmt_addr(&TO_CONN(conn)->addr), + TO_CONN(conn)->port); + connection_free_(TO_CONN(conn)); + return NULL; + } + connection_or_change_state(conn, OR_CONN_STATE_CONNECTING); control_event_or_conn_status(conn, OR_CONN_EVENT_LAUNCHED, 0); diff --git a/src/or/connection_or.h b/src/or/connection_or.h index 7c1dced631..158eb1fdad 100644 --- a/src/or/connection_or.h +++ b/src/or/connection_or.h @@ -120,6 +120,11 @@ int connection_or_single_set_badness_(time_t now, int force); void connection_or_group_set_badness_(smartlist_t *group, int force); +#ifdef CONNECTION_OR_PRIVATE +STATIC int should_connect_to_relay(const or_connection_t *or_conn); +STATIC void note_or_connect_failed(const or_connection_t *or_conn); +#endif + #ifdef TOR_UNIT_TESTS extern int certs_cell_ed25519_disabled_for_testing; #endif diff --git a/src/or/nodelist.c b/src/or/nodelist.c index 125dd8b9f1..3a26aee611 100644 --- a/src/or/nodelist.c +++ b/src/or/nodelist.c @@ -161,8 +161,8 @@ init_nodelist(void) } /** As node_get_by_id, but returns a non-const pointer */ -node_t * -node_get_mutable_by_id(const char *identity_digest) +MOCK_IMPL(node_t *, +node_get_mutable_by_id,(const char *identity_digest)) { node_t search, *node; if (PREDICT_UNLIKELY(the_nodelist == NULL)) diff --git a/src/or/nodelist.h b/src/or/nodelist.h index dc20eaf0a5..043d7b3414 100644 --- a/src/or/nodelist.h +++ b/src/or/nodelist.h @@ -16,7 +16,7 @@ tor_assert((n)->ri || (n)->rs); \ } STMT_END -node_t *node_get_mutable_by_id(const char *identity_digest); +MOCK_DECL(node_t *, node_get_mutable_by_id,(const char *identity_digest)); MOCK_DECL(const node_t *, node_get_by_id, (const char *identity_digest)); node_t *node_get_mutable_by_ed25519_id(const ed25519_public_key_t *ed_id); MOCK_DECL(const node_t *, node_get_by_ed25519_id, diff --git a/src/test/test_connection.c b/src/test/test_connection.c index 33f453b8b2..dc0f6860d9 100644 --- a/src/test/test_connection.c +++ b/src/test/test_connection.c @@ -5,6 +5,7 @@ #define CONNECTION_PRIVATE #define MAIN_PRIVATE +#define CONNECTION_OR_PRIVATE #include "or.h" #include "test.h" @@ -13,9 +14,11 @@ #include "hs_common.h" #include "main.h" #include "microdesc.h" +#include "nodelist.h" #include "networkstatus.h" #include "rendcache.h" #include "directory.h" +#include "connection_or.h" #include "test_connection.h" #include "test_helpers.h" @@ -776,6 +779,99 @@ test_conn_download_status(void *arg) /* the teardown function removes all the connections in the global list*/; } +static node_t test_node; + +static node_t * +mock_node_get_mutable_by_id(const char *digest) +{ + (void) digest; + static routerinfo_t node_ri; + memset(&node_ri, 0, sizeof(node_ri)); + + test_node.ri = &node_ri; + memset(test_node.identity, 'c', sizeof(test_node.identity)); + + tor_addr_t ipv4_addr; + tor_addr_parse(&ipv4_addr, "18.0.0.1"); + node_ri.addr = tor_addr_to_ipv4h(&ipv4_addr); + node_ri.or_port = 1; + + return &test_node; +} + +static const node_t * +mock_node_get_by_id(const char *digest) +{ + (void) digest; + memset(test_node.identity, 'c', sizeof(test_node.identity)); + return &test_node; +} + +/* Test whether we correctly track failed connections between relays. */ +static void +test_failed_orconn_tracker(void *arg) +{ + (void) arg; + + int can_connect; + time_t now = 1281533250; /* 2010-08-11 13:27:30 UTC */ + (void) now; + + update_approx_time(now); + + /* Prepare the OR connection that will be used in this test */ + or_connection_t or_conn; + tt_int_op(AF_INET,OP_EQ, tor_addr_parse(&or_conn.real_addr, "18.0.0.1")); + tt_int_op(AF_INET,OP_EQ, tor_addr_parse(&or_conn.base_.addr, "18.0.0.1")); + or_conn.base_.port = 1; + memset(or_conn.identity_digest, 'c', sizeof(or_conn.identity_digest)); + + /* Check whether we can connect with an empty failure cache: + * this should succeed */ + can_connect = should_connect_to_relay(&or_conn); + tt_int_op(can_connect, OP_EQ, 1); + + /* Now add the destination to the failure cache */ + note_or_connect_failed(&or_conn); + + /* Check again: now it shouldn't connect */ + can_connect = should_connect_to_relay(&or_conn); + tt_int_op(can_connect, OP_EQ, 0); + + /* Move time forward and check again: the cache should have been cleared and + * now it should connect */ + now += 3600; + update_approx_time(now); + can_connect = should_connect_to_relay(&or_conn); + tt_int_op(can_connect, OP_EQ, 1); + + /* Now mock the node_get_*by_id() functions to start using the node subsystem + * optimization. */ + MOCK(node_get_by_id, mock_node_get_by_id); + MOCK(node_get_mutable_by_id, mock_node_get_mutable_by_id); + + /* Since we just started using the node subsystem it will allow connections + * now */ + can_connect = should_connect_to_relay(&or_conn); + tt_int_op(can_connect, OP_EQ, 1); + + /* Mark it as failed */ + note_or_connect_failed(&or_conn); + + /* Check that it shouldn't connect now */ + can_connect = should_connect_to_relay(&or_conn); + tt_int_op(can_connect, OP_EQ, 0); + + /* Move time forward and check again: now it should connect */ + now += 3600; + update_approx_time(now); + can_connect = should_connect_to_relay(&or_conn); + tt_int_op(can_connect, OP_EQ, 1); + + done: + ; +} + #define CONNECTION_TESTCASE(name, fork, setup) \ { #name, test_conn_##name, fork, &setup, NULL } @@ -792,6 +888,7 @@ struct testcase_t connection_tests[] = { CONNECTION_TESTCASE_ARG(download_status, TT_FORK, test_conn_download_status_st, FLAV_NS), //CONNECTION_TESTCASE(func_suffix, TT_FORK, setup_func_pair), + { "failed_orconn_tracker", test_failed_orconn_tracker, TT_FORK, NULL, NULL }, END_OF_TESTCASES };