From c210db0d41f4a47496e12c0af829f8ae0a5c2cd2 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 10 Dec 2009 11:57:30 -0500 Subject: [PATCH 01/10] Enhance pqueue so we can remove items from the middle. This changes the pqueue API by requiring an additional int in every structure that we store in a pqueue to hold the index of that structure within the heap. --- src/common/container.c | 100 ++++++++++++++++++++++++++++++--- src/common/container.h | 11 +++- src/or/dns.c | 10 +++- src/test/test_containers.c | 110 ++++++++++++++++++++++++++----------- 4 files changed, 186 insertions(+), 45 deletions(-) diff --git a/src/common/container.c b/src/common/container.c index 7690b4c0ba..8b3bbeac52 100644 --- a/src/common/container.c +++ b/src/common/container.c @@ -605,6 +605,38 @@ smartlist_uniq_strings(smartlist_t *sl) /* Heap-based priority queue implementation for O(lg N) insert and remove. * Recall that the heap property is that, for every index I, h[I] < * H[LEFT_CHILD[I]] and h[I] < H[RIGHT_CHILD[I]]. + * + * For us to remove items other than the topmost item, each item must store + * its own index within the heap. When calling the pqueue functions, tell + * them about the offset of the field that stores the index within the item. + * + * Example: + * + * typedef struct timer_t { + * struct timeval tv; + * int heap_index; + * } timer_t; + * + * static int compare(const void *p1, const void *p2) { + * const timer_t *t1 = p1, *t2 = p2; + * if (t1->tv.tv_sec < t2->tv.tv_sec) { + * return -1; + * } else if (t1->tv.tv_sec > t2->tv.tv_sec) { + * return 1; + * } else { + * return t1->tv.tv_usec - t2->tv_usec; + * } + * } + * + * void timer_heap_insert(smartlist_t *heap, timer_t *timer) { + * smartlist_pqueue_add(heap, compare, STRUCT_OFFSET(timer_t, heap_index), + * timer); + * } + * + * void timer_heap_pop(smartlist_t *heap) { + * return smartlist_pqueue_pop(heap, compare, + * STRUCT_OFFSET(timer_t, heap_index)); + * } */ /* For a 1-indexed array, we would use LEFT_CHILD[x] = 2*x and RIGHT_CHILD[x] @@ -616,12 +648,22 @@ smartlist_uniq_strings(smartlist_t *sl) #define RIGHT_CHILD(i) ( 2*(i) + 2 ) #define PARENT(i) ( ((i)-1) / 2 ) +#define IDXP(p) ((int*)STRUCT_VAR_P(p, idx_field_offset)) + +#define UPDATE_IDX(i) do { \ + void *updated = sl->list[i]; \ + *IDXP(updated) = i; \ + } while (0) + +#define IDX_OF_ITEM(p) (*IDXP(p)) + /** Helper. sl may have at most one violation of the heap property: * the item at idx may be greater than one or both of its children. * Restore the heap property. */ static INLINE void smartlist_heapify(smartlist_t *sl, int (*compare)(const void *a, const void *b), + int idx_field_offset, int idx) { while (1) { @@ -644,21 +686,28 @@ smartlist_heapify(smartlist_t *sl, void *tmp = sl->list[idx]; sl->list[idx] = sl->list[best_idx]; sl->list[best_idx] = tmp; + UPDATE_IDX(idx); + UPDATE_IDX(best_idx); idx = best_idx; } } } -/** Insert item into the heap stored in sl, where order - * is determined by compare. */ +/** Insert item into the heap stored in sl, where order is + * determined by compare and the offset of the item in the heap is + * stored in an int-typed field at position idx_field_offset within + * item. + */ void smartlist_pqueue_add(smartlist_t *sl, int (*compare)(const void *a, const void *b), + int idx_field_offset, void *item) { int idx; smartlist_add(sl,item); + UPDATE_IDX(sl->num_used-1); for (idx = sl->num_used - 1; idx; ) { int parent = PARENT(idx); @@ -666,6 +715,8 @@ smartlist_pqueue_add(smartlist_t *sl, void *tmp = sl->list[parent]; sl->list[parent] = sl->list[idx]; sl->list[idx] = tmp; + UPDATE_IDX(parent); + UPDATE_IDX(idx); idx = parent; } else { return; @@ -674,32 +725,63 @@ smartlist_pqueue_add(smartlist_t *sl, } /** Remove and return the top-priority item from the heap stored in sl, - * where order is determined by compare. sl must not be - * empty. */ + * where order is determined by compare and the item's position in is + * stored at position idx_field_offset within the item. sl must + * not be empty. */ void * smartlist_pqueue_pop(smartlist_t *sl, - int (*compare)(const void *a, const void *b)) + int (*compare)(const void *a, const void *b), + int idx_field_offset) { void *top; tor_assert(sl->num_used); top = sl->list[0]; + *IDXP(top)=-1; if (--sl->num_used) { sl->list[0] = sl->list[sl->num_used]; - smartlist_heapify(sl, compare, 0); + UPDATE_IDX(0); + smartlist_heapify(sl, compare, idx_field_offset, 0); } return top; } +/** Remove the item item from the heap stored in sl, + * where order is determined by compare and the item's position in is + * stored at position idx_field_offset within the item. sl must + * not be empty. */ +void +smartlist_pqueue_remove(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset, + void *item) +{ + int idx = IDX_OF_ITEM(item); + tor_assert(idx >= 0); + tor_assert(sl->list[idx] == item); + --sl->num_used; + *IDXP(item) = -1; + if (idx == sl->num_used) { + return; + } else { + sl->list[idx] = sl->list[sl->num_used]; + UPDATE_IDX(idx); + smartlist_heapify(sl, compare, idx_field_offset, idx); + } +} + /** Assert that the heap property is correctly maintained by the heap stored * in sl, where order is determined by compare. */ void smartlist_pqueue_assert_ok(smartlist_t *sl, - int (*compare)(const void *a, const void *b)) + int (*compare)(const void *a, const void *b), + int idx_field_offset) { int i; - for (i = sl->num_used - 1; i > 0; --i) { - tor_assert(compare(sl->list[PARENT(i)], sl->list[i]) <= 0); + for (i = sl->num_used - 1; i >= 0; --i) { + if (i>0) + tor_assert(compare(sl->list[PARENT(i)], sl->list[i]) <= 0); + tor_assert(IDX_OF_ITEM(sl->list[i]) == i); } } diff --git a/src/common/container.h b/src/common/container.h index 41c0c68705..8077d56ebd 100644 --- a/src/common/container.h +++ b/src/common/container.h @@ -118,11 +118,18 @@ int smartlist_bsearch_idx(const smartlist_t *sl, const void *key, void smartlist_pqueue_add(smartlist_t *sl, int (*compare)(const void *a, const void *b), + int idx_field_offset, void *item); void *smartlist_pqueue_pop(smartlist_t *sl, - int (*compare)(const void *a, const void *b)); + int (*compare)(const void *a, const void *b), + int idx_field_offset); +void smartlist_pqueue_remove(smartlist_t *sl, + int (*compare)(const void *a, const void *b), + int idx_field_offset, + void *item); void smartlist_pqueue_assert_ok(smartlist_t *sl, - int (*compare)(const void *a, const void *b)); + int (*compare)(const void *a, const void *b), + int idx_field_offset); #define SPLIT_SKIP_SPACE 0x01 #define SPLIT_IGNORE_BLANK 0x02 diff --git a/src/or/dns.c b/src/or/dns.c index 963039df44..9ecdf426e4 100644 --- a/src/or/dns.c +++ b/src/or/dns.c @@ -128,6 +128,8 @@ typedef struct cached_resolve_t { uint32_t ttl; /**< What TTL did the nameserver tell us? */ /** Connections that want to know when we get an answer for this resolve. */ pending_connection_t *pending_connections; + /** Position of this element in the heap*/ + int minheap_idx; } cached_resolve_t; static void purge_expired_resolves(time_t now); @@ -344,6 +346,7 @@ set_expiry(cached_resolve_t *resolve, time_t expires) resolve->expire = expires; smartlist_pqueue_add(cached_resolve_pqueue, _compare_cached_resolves_by_expiry, + STRUCT_OFFSET(cached_resolve_t, minheap_idx), resolve); } @@ -389,7 +392,8 @@ purge_expired_resolves(time_t now) if (resolve->expire > now) break; smartlist_pqueue_pop(cached_resolve_pqueue, - _compare_cached_resolves_by_expiry); + _compare_cached_resolves_by_expiry, + STRUCT_OFFSET(cached_resolve_t, minheap_idx)); if (resolve->state == CACHE_STATE_PENDING) { log_debug(LD_EXIT, @@ -751,6 +755,7 @@ dns_resolve_impl(edge_connection_t *exitconn, int is_resolve, resolve = tor_malloc_zero(sizeof(cached_resolve_t)); resolve->magic = CACHED_RESOLVE_MAGIC; resolve->state = CACHE_STATE_PENDING; + resolve->minheap_idx = -1; resolve->is_reverse = is_reverse; strlcpy(resolve->address, exitconn->_base.address, sizeof(resolve->address)); @@ -1734,7 +1739,8 @@ _assert_cache_ok(void) return; smartlist_pqueue_assert_ok(cached_resolve_pqueue, - _compare_cached_resolves_by_expiry); + _compare_cached_resolves_by_expiry, + STRUCT_OFFSET(cached_resolve_t, minheap_idx)); SMARTLIST_FOREACH(cached_resolve_pqueue, cached_resolve_t *, res, { diff --git a/src/test/test_containers.c b/src/test/test_containers.c index fc48ac966a..efc879b8c2 100644 --- a/src/test/test_containers.c +++ b/src/test/test_containers.c @@ -515,11 +515,17 @@ test_container_digestset(void) smartlist_free(included); } -/** Helper: return a tristate based on comparing two strings. */ +typedef struct pq_entry_t { + const char *val; + int idx; +} pq_entry_t; + +/** Helper: return a tristate based on comparing two pq_entry_t values. */ static int -_compare_strings_for_pqueue(const void *s1, const void *s2) +_compare_strings_for_pqueue(const void *p1, const void *p2) { - return strcmp((const char*)s1, (const char*)s2); + const pq_entry_t *e1=p1, *e2=p2; + return strcmp(e1->val, e2->val); } /** Run unit tests for heap-based priority queue functions. */ @@ -528,50 +534,90 @@ test_container_pqueue(void) { smartlist_t *sl = smartlist_create(); int (*cmp)(const void *, const void*); -#define OK() smartlist_pqueue_assert_ok(sl, cmp) + const int offset = STRUCT_OFFSET(pq_entry_t, idx); +#define ENTRY(s) pq_entry_t s = { #s, -1 } + ENTRY(cows); + ENTRY(zebras); + ENTRY(fish); + ENTRY(frogs); + ENTRY(apples); + ENTRY(squid); + ENTRY(daschunds); + ENTRY(eggplants); + ENTRY(weissbier); + ENTRY(lobsters); + ENTRY(roquefort); + ENTRY(chinchillas); + ENTRY(fireflies); + +#define OK() smartlist_pqueue_assert_ok(sl, cmp, offset) cmp = _compare_strings_for_pqueue; - - smartlist_pqueue_add(sl, cmp, (char*)"cows"); - smartlist_pqueue_add(sl, cmp, (char*)"zebras"); - smartlist_pqueue_add(sl, cmp, (char*)"fish"); - smartlist_pqueue_add(sl, cmp, (char*)"frogs"); - smartlist_pqueue_add(sl, cmp, (char*)"apples"); - smartlist_pqueue_add(sl, cmp, (char*)"squid"); - smartlist_pqueue_add(sl, cmp, (char*)"daschunds"); - smartlist_pqueue_add(sl, cmp, (char*)"eggplants"); - smartlist_pqueue_add(sl, cmp, (char*)"weissbier"); - smartlist_pqueue_add(sl, cmp, (char*)"lobsters"); - smartlist_pqueue_add(sl, cmp, (char*)"roquefort"); + smartlist_pqueue_add(sl, cmp, offset, &cows); + smartlist_pqueue_add(sl, cmp, offset, &zebras); + smartlist_pqueue_add(sl, cmp, offset, &fish); + smartlist_pqueue_add(sl, cmp, offset, &frogs); + smartlist_pqueue_add(sl, cmp, offset, &apples); + smartlist_pqueue_add(sl, cmp, offset, &squid); + smartlist_pqueue_add(sl, cmp, offset, &daschunds); + smartlist_pqueue_add(sl, cmp, offset, &eggplants); + smartlist_pqueue_add(sl, cmp, offset, &weissbier); + smartlist_pqueue_add(sl, cmp, offset, &lobsters); + smartlist_pqueue_add(sl, cmp, offset, &roquefort); OK(); test_eq(smartlist_len(sl), 11); - test_streq(smartlist_get(sl, 0), "apples"); - test_streq(smartlist_pqueue_pop(sl, cmp), "apples"); + test_eq_ptr(smartlist_get(sl, 0), &apples); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &apples); test_eq(smartlist_len(sl), 10); OK(); - test_streq(smartlist_pqueue_pop(sl, cmp), "cows"); - test_streq(smartlist_pqueue_pop(sl, cmp), "daschunds"); - smartlist_pqueue_add(sl, cmp, (char*)"chinchillas"); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &cows); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &daschunds); + smartlist_pqueue_add(sl, cmp, offset, &chinchillas); OK(); - smartlist_pqueue_add(sl, cmp, (char*)"fireflies"); + smartlist_pqueue_add(sl, cmp, offset, &fireflies); OK(); - test_streq(smartlist_pqueue_pop(sl, cmp), "chinchillas"); - test_streq(smartlist_pqueue_pop(sl, cmp), "eggplants"); - test_streq(smartlist_pqueue_pop(sl, cmp), "fireflies"); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &chinchillas); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &eggplants); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &fireflies); OK(); - test_streq(smartlist_pqueue_pop(sl, cmp), "fish"); - test_streq(smartlist_pqueue_pop(sl, cmp), "frogs"); - test_streq(smartlist_pqueue_pop(sl, cmp), "lobsters"); - test_streq(smartlist_pqueue_pop(sl, cmp), "roquefort"); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &fish); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &frogs); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &lobsters); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &roquefort); OK(); test_eq(smartlist_len(sl), 3); - test_streq(smartlist_pqueue_pop(sl, cmp), "squid"); - test_streq(smartlist_pqueue_pop(sl, cmp), "weissbier"); - test_streq(smartlist_pqueue_pop(sl, cmp), "zebras"); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &squid); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &weissbier); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &zebras); test_eq(smartlist_len(sl), 0); OK(); + + /* Now test remove. */ + smartlist_pqueue_add(sl, cmp, offset, &cows); + smartlist_pqueue_add(sl, cmp, offset, &fish); + smartlist_pqueue_add(sl, cmp, offset, &frogs); + smartlist_pqueue_add(sl, cmp, offset, &apples); + smartlist_pqueue_add(sl, cmp, offset, &squid); + smartlist_pqueue_add(sl, cmp, offset, &zebras); + test_eq(smartlist_len(sl), 6); + OK(); + smartlist_pqueue_remove(sl, cmp, offset, &zebras); + test_eq(smartlist_len(sl), 5); + OK(); + smartlist_pqueue_remove(sl, cmp, offset, &cows); + test_eq(smartlist_len(sl), 4); + OK(); + smartlist_pqueue_remove(sl, cmp, offset, &apples); + test_eq(smartlist_len(sl), 3); + OK(); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &fish); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &frogs); + test_eq_ptr(smartlist_pqueue_pop(sl, cmp, offset), &squid); + test_eq(smartlist_len(sl), 0); + OK(); + #undef OK done: From d3be00e0f454998db6387c8547d218a0db93db21 Mon Sep 17 00:00:00 2001 From: Can Tang Date: Thu, 10 Dec 2009 11:12:42 -0500 Subject: [PATCH 02/10] Favor quiet circuits when choosing which order to relay cells in. Each circuit is ranked in terms of how many cells from it have been relayed recently, using a time-weighted average. This patch has been tested this on a private Tor network on PlanetLab, and gotten improvements of 12-35% in time it takes to fetch a small web page while there's a simultaneous large data transfer going on simultaneously. [Commit msg by nickm based on mail from Ian Goldberg.] --- src/or/circuitlist.c | 12 +++++ src/or/config.c | 5 ++ src/or/connection.c | 28 ++++++++-- src/or/or.h | 32 ++++++++++++ src/or/relay.c | 120 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 191 insertions(+), 6 deletions(-) diff --git a/src/or/circuitlist.c b/src/or/circuitlist.c index 2c949def00..eb8c90f460 100644 --- a/src/or/circuitlist.c +++ b/src/or/circuitlist.c @@ -385,6 +385,10 @@ init_circuit_base(circuit_t *circ) circ->package_window = circuit_initial_package_window(); circ->deliver_window = CIRCWINDOW_START; + /* Initialize the cell_ewma_t structure */ + circ->n_cell_ewma.last_cell_time = circ->highres_created; + circ->n_cell_ewma.cell_count = 0.0; + circuit_add(circ); } @@ -432,6 +436,14 @@ or_circuit_new(circid_t p_circ_id, or_connection_t *p_conn) init_circuit_base(TO_CIRCUIT(circ)); + /* Initialize the cell_ewma_t structure */ + + /* Fetch the timeval that init_circuit_base filled in. */ + circ->p_cell_ewma.last_cell_time = TO_CIRCUIT(circ)->highres_created; + + /* Initialize the cell counts to 0 */ + circ->p_cell_ewma.cell_count = 0.0; + return circ; } diff --git a/src/or/config.c b/src/or/config.c index deeda163b6..fe5fe9f7ee 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -355,6 +355,11 @@ static config_var_t _option_vars[] = { VAR("__HashedControlSessionPassword", LINELIST, HashedControlSessionPassword, NULL), V(MinUptimeHidServDirectoryV2, INTERVAL, "24 hours"), + + /* Options for EWMA selection of circuit to write from */ + VAR("EWMASignificance", DOUBLE, EWMASignificance, "-1.0"), + VAR("EWMAInterval", DOUBLE, EWMAInterval, "-1.0"), + { NULL, CONFIG_TYPE_OBSOLETE, 0, NULL } }; diff --git a/src/or/connection.c b/src/or/connection.c index 0ff1cc5876..bd12e36180 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -2275,8 +2275,8 @@ connection_read_bucket_should_increase(or_connection_t *conn) * Mark the connection and return -1 if you want to close it, else * return 0. */ -int -connection_handle_read(connection_t *conn) +static int +connection_handle_read_impl(connection_t *conn) { int max_to_read=-1, try_to_read; size_t before, n_read = 0; @@ -2371,6 +2371,17 @@ loop_again: return 0; } +int +connection_handle_read(connection_t *conn) +{ + int res; + + tor_gettimeofday_cache_clear(); + res = connection_handle_read_impl(conn); + return res; + +} + /** Pull in new bytes from conn-\>s or conn-\>linked_conn onto conn-\>inbuf, * either directly or via TLS. Reduce the token buckets by the number of bytes * read. @@ -2572,8 +2583,8 @@ connection_outbuf_too_full(connection_t *conn) * Mark the connection and return -1 if you want to close it, else * return 0. */ -int -connection_handle_write(connection_t *conn, int force) +static int +connection_handle_write_impl(connection_t *conn, int force) { int e; socklen_t len=(socklen_t)sizeof(e); @@ -2740,6 +2751,15 @@ connection_handle_write(connection_t *conn, int force) return 0; } +int +connection_handle_write(connection_t *conn, int force) +{ + int res; + tor_gettimeofday_cache_clear(); + res = connection_handle_write_impl(conn, force); + return res; +} + /** OpenSSL TLS record size is 16383; this is close. The goal here is to * push data out as soon as we know there's enough for a TLS record, so * during periods of high load we won't read entire megabytes from diff --git a/src/or/or.h b/src/or/or.h index 2e575f5ef9..003d0ebdad 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1992,6 +1992,20 @@ typedef struct { time_t expiry_time; } cpath_build_state_t; +/** + * The cell_ewma_t structure keeps track of how many cells a circuit has + * transferred recently. It keeps an EWMA (exponentially weighted + * moving average) of the number of cells flushed in + * connection_or_flush_from_first_active_circuit(). + */ + +typedef struct { + /** The last time a cell was flushed from this circuit. */ + struct timeval last_cell_time; + /** The EWMA of the cell count. */ + double cell_count; +} cell_ewma_t; + #define ORIGIN_CIRCUIT_MAGIC 0x35315243u #define OR_CIRCUIT_MAGIC 0x98ABC04Fu @@ -2081,6 +2095,10 @@ typedef struct circuit_t { /** Unique ID for measuring tunneled network status requests. */ uint64_t dirreq_id; + + /** The EWMA count for the number of cells flushed from the + * n_conn_cells queue. */ + cell_ewma_t n_cell_ewma; } circuit_t; /** Largest number of relay_early cells that we can send on a given @@ -2212,6 +2230,10 @@ typedef struct or_circuit_t { * exit-ward queues of this circuit; reset every time when writing * buffer stats to disk. */ uint64_t total_cell_waiting_time; + + /** The EWMA count for the number of cells flushed from the + * p_conn_cells queue. */ + cell_ewma_t p_cell_ewma; } or_circuit_t; /** Convert a circuit subtype to a circuit_t.*/ @@ -2740,6 +2762,14 @@ typedef struct { * to make this false. */ int ReloadTorrcOnSIGHUP; + /* The EWMA parameters for circuit selection within a connection. + * The most recent EWMAInterval seconds will account for an + * EWMASignificance (between 0 and 1) portion of the weight. + * If these values are negative, use the global defaults (soon to be + * set in the consensus). */ + double EWMASignificance; + double EWMAInterval; + } or_options_t; /** Persistent state for an onion router, as saved to disk. */ @@ -5122,5 +5152,7 @@ int rend_parse_introduction_points(rend_service_descriptor_t *parsed, size_t intro_points_encoded_size); int rend_parse_client_keys(strmap_t *parsed_clients, const char *str); +void tor_gettimeofday_cache_clear(void); + #endif diff --git a/src/or/relay.c b/src/or/relay.c index ac305ce3df..147412f596 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -10,6 +10,7 @@ * receiving from circuits, plus queuing on circuits. **/ +#include #include "or.h" #include "mempool.h" @@ -35,6 +36,26 @@ circuit_resume_edge_reading_helper(edge_connection_t *conn, static int circuit_consider_stop_edge_reading(circuit_t *circ, crypt_path_t *layer_hint); +/** Cache the current hi-res time; the cache gets reset when libevent + * calls us. */ + +static struct timeval cached_time_hires = {0, 0}; + +static void +tor_gettimeofday_cached(struct timeval *tv) +{ + if (cached_time_hires.tv_sec == 0) { + tor_gettimeofday(&cached_time_hires); + } + *tv = cached_time_hires; +} + +void +tor_gettimeofday_cache_clear(void) +{ + cached_time_hires.tv_sec = 0; +} + /** Stats: how many relay cells have originated at this hop, or have * been relayed onward (not recognized at this hop)? */ @@ -1633,7 +1654,7 @@ cell_queue_append_packed_copy(cell_queue_t *queue, const cell_t *cell) insertion_time_queue_t *it_queue = queue->insertion_times; if (!it_pool) it_pool = mp_pool_new(sizeof(insertion_time_elem_t), 1024); - tor_gettimeofday(&now); + tor_gettimeofday_cached(&now); #define SECONDS_IN_A_DAY 86400L added = (uint32_t)(((now.tv_sec % SECONDS_IN_A_DAY) * 100L) + ((uint32_t)now.tv_usec / (uint32_t)10000L)); @@ -1857,9 +1878,101 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, cell_queue_t *queue; circuit_t *circ; int streams_blocked; + + /* The current (hi-res) time */ + struct timeval now_hires; + + /* The EWMA cell counter for the circuit we're flushing. */ + cell_ewma_t *cell_ewma = NULL; + + /* The global cell EWMA parameters. The algorithm is parameterized by + * two values (type double): + * + * "significance" (between 0 and 1) and "interval" + * + * The cell count is weighted so that the most recent "interval" + * seconds will account for "significance" of the weight. + * + * If "interval" is set to 0, it disables the algorithm, and the old + * algorithm (round-robin) is used. + * + * These parameters should really be set by the consensus, but can be + * overridden by the torrc (in which case the options values will be + * >= 0.0). + */ + static double cell_ewma_significance = 0.9; + static double cell_ewma_interval = 10.0; + + double significance_override = get_options()->EWMASignificance; + double interval_override = get_options()->EWMAInterval; + if (significance_override >= 0.0) { + cell_ewma_significance = significance_override; + } + if (interval_override >= 0.0) { + cell_ewma_interval = interval_override; + } + circ = conn->active_circuits; if (!circ) return 0; assert_active_circuits_ok_paranoid(conn); + + /* See if we're doing the ewma circuit selection algorithm. */ + if (cell_ewma_interval > 0.0) { + /* Is there another circuit we might like better? */ + circuit_t *circ_iter, *circ_start; + circuit_t *circ_min_cell_count = NULL; + double min_cell_count = 0.0; + tor_gettimeofday_cached(&now_hires); + + /* Start with circ, and go around the circular linked list */ + circ_start = circ_iter = circ; + do { + double delta_t; + + /* Find the appropriate EWMA cell counter to use. */ + if (circ_iter->n_conn == conn) { + cell_ewma = &(circ_iter->n_cell_ewma); + } else { + cell_ewma = &(TO_OR_CIRCUIT(circ_iter)->p_cell_ewma); + } + + /* Update the EWMA cell counter to account for the passage of time. */ + delta_t = (double)(now_hires.tv_sec - + cell_ewma->last_cell_time.tv_sec); + delta_t += ((double)(now_hires.tv_usec - + cell_ewma->last_cell_time.tv_usec)) / 1000000.0; + + if (delta_t > 0.0) { + cell_ewma->cell_count *= + pow(cell_ewma_significance, delta_t / cell_ewma_interval); + //printf("cc: %f ", cell_ewma->cell_count); + } + cell_ewma->last_cell_time = now_hires; + + /* Now keep track of the lowest cell count we've seen. */ + if (circ_min_cell_count == NULL || + cell_ewma->cell_count < min_cell_count) { + min_cell_count = cell_ewma->cell_count; + circ_min_cell_count = circ_iter; + } + + circ_iter = *next_circ_on_conn_p(circ_iter, conn); + } while (circ_iter != circ_start); + + /* OK, we've gone all the way around. Let's use the circ with the + * lowest (recent) cell count. */ + circ = circ_min_cell_count; + + /* Now set the appropriate EWMA cell counter to use below to add the + * cells we actually send. */ + if (circ_min_cell_count->n_conn == conn) { + cell_ewma = &(circ_min_cell_count->n_cell_ewma); + } else { + cell_ewma = &(TO_OR_CIRCUIT(circ_min_cell_count)->p_cell_ewma); + } + } + + if (circ->n_conn == conn) { queue = &circ->n_conn_cells; streams_blocked = circ->streams_blocked_on_n_conn; @@ -1879,7 +1992,7 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, uint32_t flushed; uint32_t cell_waiting_time; insertion_time_queue_t *it_queue = queue->insertion_times; - tor_gettimeofday(&now); + tor_gettimeofday_cached(&now); flushed = (uint32_t)((now.tv_sec % SECONDS_IN_A_DAY) * 100L + (uint32_t)now.tv_usec / (uint32_t)10000L); if (!it_queue || !it_queue->first) { @@ -1915,6 +2028,9 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, packed_cell_free_unchecked(cell); ++n_flushed; + if (cell_ewma) { + cell_ewma->cell_count += 1.0; + } if (circ != conn->active_circuits) { /* If this happens, the current circuit just got made inactive by * a call in connection_write_to_buf(). That's nothing to worry about: From c43fee131d306507937733c7ddc45a040dd2d27c Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 10 Dec 2009 11:15:23 -0500 Subject: [PATCH 03/10] Adjust EWMA patch to conform to whitespace style. --- src/or/connection.c | 1 - src/or/relay.c | 17 ++++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/or/connection.c b/src/or/connection.c index bd12e36180..409884b920 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -2379,7 +2379,6 @@ connection_handle_read(connection_t *conn) tor_gettimeofday_cache_clear(); res = connection_handle_read_impl(conn); return res; - } /** Pull in new bytes from conn-\>s or conn-\>linked_conn onto conn-\>inbuf, diff --git a/src/or/relay.c b/src/or/relay.c index 147412f596..9b7396afac 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -1938,24 +1938,24 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, /* Update the EWMA cell counter to account for the passage of time. */ delta_t = (double)(now_hires.tv_sec - - cell_ewma->last_cell_time.tv_sec); + cell_ewma->last_cell_time.tv_sec); delta_t += ((double)(now_hires.tv_usec - - cell_ewma->last_cell_time.tv_usec)) / 1000000.0; + cell_ewma->last_cell_time.tv_usec)) / 1000000.0; if (delta_t > 0.0) { cell_ewma->cell_count *= - pow(cell_ewma_significance, delta_t / cell_ewma_interval); - //printf("cc: %f ", cell_ewma->cell_count); + pow(cell_ewma_significance, delta_t / cell_ewma_interval); + //printf("cc: %f ", cell_ewma->cell_count); } cell_ewma->last_cell_time = now_hires; /* Now keep track of the lowest cell count we've seen. */ if (circ_min_cell_count == NULL || - cell_ewma->cell_count < min_cell_count) { - min_cell_count = cell_ewma->cell_count; - circ_min_cell_count = circ_iter; + cell_ewma->cell_count < min_cell_count) { + min_cell_count = cell_ewma->cell_count; + circ_min_cell_count = circ_iter; } - + circ_iter = *next_circ_on_conn_p(circ_iter, conn); } while (circ_iter != circ_start); @@ -1971,7 +1971,6 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, cell_ewma = &(TO_OR_CIRCUIT(circ_min_cell_count)->p_cell_ewma); } } - if (circ->n_conn == conn) { queue = &circ->n_conn_cells; From 06e8370c33d6ccb73d55e9e8c3d2673c48d7b328 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Sat, 12 Dec 2009 00:49:48 -0500 Subject: [PATCH 04/10] Optimize cell-ewma circuit priority algorithm. There are two big changes here: - We store active circuits in a priority queue for each or_conn, rather than doing a linear search over all the active circuits before we send each cell. - Rather than multiplying every circuit's cell-ewma by a decay factor every time we send a cell (thus normalizing the value of a current cell to 1.0 and a past cell to alpha^t), we instead only scale down the cell-ewma every tick (ten seconds atm), normalizing so that a cell sent at the start of the tick has value 1.0). --- src/or/circuitlist.c | 12 +- src/or/config.c | 3 + src/or/connection.c | 4 + src/or/connection_or.c | 1 - src/or/or.h | 41 +++-- src/or/relay.c | 329 +++++++++++++++++++++++++++++++---------- 6 files changed, 297 insertions(+), 93 deletions(-) diff --git a/src/or/circuitlist.c b/src/or/circuitlist.c index eb8c90f460..0dff873cf6 100644 --- a/src/or/circuitlist.c +++ b/src/or/circuitlist.c @@ -386,8 +386,10 @@ init_circuit_base(circuit_t *circ) circ->deliver_window = CIRCWINDOW_START; /* Initialize the cell_ewma_t structure */ - circ->n_cell_ewma.last_cell_time = circ->highres_created; + circ->n_cell_ewma.last_adjusted_tick = cell_ewma_get_tick(); circ->n_cell_ewma.cell_count = 0.0; + circ->n_cell_ewma.heap_index = -1; + circ->n_cell_ewma.is_for_p_conn = 0; circuit_add(circ); } @@ -438,11 +440,13 @@ or_circuit_new(circid_t p_circ_id, or_connection_t *p_conn) /* Initialize the cell_ewma_t structure */ - /* Fetch the timeval that init_circuit_base filled in. */ - circ->p_cell_ewma.last_cell_time = TO_CIRCUIT(circ)->highres_created; - /* Initialize the cell counts to 0 */ circ->p_cell_ewma.cell_count = 0.0; + circ->p_cell_ewma.last_adjusted_tick = cell_ewma_get_tick(); + circ->p_cell_ewma.is_for_p_conn = 1; + + /* It's not in any heap yet. */ + circ->p_cell_ewma.heap_index = -1; return circ; } diff --git a/src/or/config.c b/src/or/config.c index fe5fe9f7ee..a22ec4b13d 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -1379,6 +1379,9 @@ options_act(or_options_t *old_options) if (accounting_is_enabled(options)) configure_accounting(time(NULL)); + /* Change the cell EWMA settings */ + cell_ewma_set_scale_factor(options); + /* Check for transitions that need action. */ if (old_options) { if (options->UseEntryGuards && !old_options->UseEntryGuards) { diff --git a/src/or/connection.c b/src/or/connection.c index 409884b920..a95850b9e5 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -180,6 +180,9 @@ or_connection_new(int socket_family) or_conn->timestamp_last_added_nonpadding = time(NULL); or_conn->next_circ_id = crypto_rand_int(1<<15); + or_conn->active_circuit_pqueue = smartlist_create(); + or_conn->active_circuit_pqueue_last_recalibrated = cell_ewma_get_tick(); + return or_conn; } @@ -375,6 +378,7 @@ _connection_free(connection_t *conn) or_conn->tls = NULL; or_handshake_state_free(or_conn->handshake_state); or_conn->handshake_state = NULL; + smartlist_free(or_conn->active_circuit_pqueue); tor_free(or_conn->nickname); } if (CONN_IS_EDGE(conn)) { diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 0c32eeff0d..55e7956011 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -80,7 +80,6 @@ connection_or_clear_identity_map(void) } }); - digestmap_free(orconn_identity_map, NULL); orconn_identity_map = NULL; } diff --git a/src/or/or.h b/src/or/or.h index 003d0ebdad..8be84910f1 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1075,6 +1075,17 @@ typedef struct or_connection_t { * free up on this connection's outbuf. Every time we pull cells from a * circuit, we advance this pointer to the next circuit in the ring. */ struct circuit_t *active_circuits; + /** Priority queue of cell_ewma_t for circuits with queued cells waiting for + * room to free up on this connection's outbuf. Kept in heap order + * according to EWMA. + * + * This is redundant with active_circuits; if we ever decide only to use the + * cell_ewma algorithm for choosing circuits, we can remove active_circuits. + */ + smartlist_t *active_circuit_pqueue; + /** The tick on which the ciell_ewma_t's in active_circuit_pqueue last had + * their ewma values rescaled. */ + unsigned active_circuit_pqueue_last_recalibrated; struct or_connection_t *next_with_same_id; /**< Next connection with same * identity digest as this one. */ } or_connection_t; @@ -1994,16 +2005,25 @@ typedef struct { /** * The cell_ewma_t structure keeps track of how many cells a circuit has - * transferred recently. It keeps an EWMA (exponentially weighted - * moving average) of the number of cells flushed in - * connection_or_flush_from_first_active_circuit(). + * transferred recently. It keeps an EWMA (exponentially weighted moving + * average) of the number of cells flushed from the circuit queue onto a + * connection in connection_or_flush_from_first_active_circuit(). */ - typedef struct { - /** The last time a cell was flushed from this circuit. */ - struct timeval last_cell_time; - /** The EWMA of the cell count. */ - double cell_count; + /** The last 'tick' at which we recalibrated cell_count. + * + * A cell sent at exactly the start of this tick has weight 1.0. Cells sent + * since the start of this tick have weight greater than 1.0; ones sent + * earlier have less weight. */ + unsigned last_adjusted_tick; + /** The EWMA of the cell count. */ + double cell_count; + /** True iff this is a the cell count for a circuit's previous + * connection. */ + unsigned int is_for_p_conn : 1; + /** The position of the circuit within the or connection's priority + * queue. */ + int heap_index; } cell_ewma_t; #define ORIGIN_CIRCUIT_MAGIC 0x35315243u @@ -2097,7 +2117,8 @@ typedef struct circuit_t { uint64_t dirreq_id; /** The EWMA count for the number of cells flushed from the - * n_conn_cells queue. */ + * n_conn_cells queue. Used to determine which circuit to flush from next. + */ cell_ewma_t n_cell_ewma; } circuit_t; @@ -4469,6 +4490,8 @@ int append_address_to_payload(char *payload_out, const tor_addr_t *addr); const char *decode_address_from_payload(tor_addr_t *addr_out, const char *payload, int payload_len); +unsigned cell_ewma_get_tick(void); +void cell_ewma_set_scale_factor(or_options_t *options); /********************************* rephist.c ***************************/ diff --git a/src/or/relay.c b/src/or/relay.c index 9b7396afac..19bb6871ce 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -1752,6 +1752,194 @@ prev_circ_on_conn_p(circuit_t *circ, or_connection_t *conn) } } +/** Helper for sorting cell_ewma_t values in their priority queue. */ +static int +compare_cell_ewma_counts(const void *p1, const void *p2) +{ + const cell_ewma_t *e1=p1, *e2=p2; + if (e1->cell_count < e2->cell_count) + return -1; + else if (e1->cell_count > e2->cell_count) + return 1; + else + return 0; +} + +/** Given a cell_ewma_t, return a pointer to the circuit containing it. */ +static circuit_t * +cell_ewma_to_circuit(cell_ewma_t *ewma) +{ + if (ewma->is_for_p_conn) { + /* This is an or_circuit_t's p_cell_ewma. */ + or_circuit_t *orcirc = SUBTYPE_P(ewma, or_circuit_t, p_cell_ewma); + return TO_CIRCUIT(orcirc); + } else { + /* This is some circuit's n_cell_ewma. */ + return SUBTYPE_P(ewma, circuit_t, n_cell_ewma); + } +} + +/* ==== Functions for scaling cell_ewma_t ==== + + When choosing which cells to relay first, we favor circuits that have been + quiet recently. This gives better latency on connections that aren't + pushing lots of data, and makes the network feel more interactive. + + Conceptually, we take an exponentially weighted mean average of the number + of cells a circuit has sent, and allow active circuits (those with cells to + relay) to send cells in order of their exponentially-weighted mean average + (EWMA) cell count. [That is, a cell sent N seconds ago 'counts' F^N times + as much as a cell sent now, for 0tv_sec / EWMA_TICK_LEN; + /* rem */ + double rem = (now->tv_sec % EWMA_TICK_LEN) + + ((double)(now->tv_usec)) / 1.0e6; + *remainder_out = rem / EWMA_TICK_LEN; + return res; +} + +/** Compute and return the current cell_ewma tick. */ +unsigned +cell_ewma_get_tick(void) +{ + return ((unsigned)approx_time() / EWMA_TICK_LEN); +} + +/** The per-tick scale factor to be used when computing cell-count EWMA + * values. (A cell sent N ticks before the start of the current tick + * has value ewma_scale_factor ** N.) + * + * If ewma_scale_factor is <= 0, the EWMA algorithm is disabled. + */ +static double ewma_scale_factor = EWMA_DEFAULT_SCALE_FACTOR; + +/** Adjust the global cell scale factor based on options */ +void +cell_ewma_set_scale_factor(or_options_t *options) +{ + double f; + if (options->EWMAInterval > 0.0001) { + f = pow(options->EWMASignificance, + EWMA_TICK_LEN / options->EWMAInterval); + } else { + f = EWMA_DEFAULT_SCALE_FACTOR; + } + + ewma_scale_factor = f; +} + +/** Return the multiplier necessary to convert the value of a cell sent in + * 'from_tick' to one sent in 'to_tick'. */ +static INLINE double +get_scale_factor(unsigned from_tick, unsigned to_tick) +{ + /* This math can wrap around, but that's okay: unsigned overflow is + well-defined */ + int diff = (int)(to_tick - from_tick); + return pow(ewma_scale_factor, diff); +} + +/** Adjust the cell count of ewma so that it is scaled with respect to + * cur_tick */ +static void +scale_single_cell_ewma(cell_ewma_t *ewma, unsigned cur_tick) +{ + double factor = get_scale_factor(ewma->last_adjusted_tick, cur_tick); + ewma->cell_count *= factor; + ewma->last_adjusted_tick = cur_tick; +} + +/** Adjust the cell count of every active circuit on conn so + * that they are scaled with respect to cur_tick */ +static void +scale_active_circuits(or_connection_t *conn, unsigned cur_tick) +{ + + double factor = get_scale_factor( + conn->active_circuit_pqueue_last_recalibrated, + cur_tick); + /** Ordinarily it isn't okay to change the value of an element in a heap, + * but it's okay here, since we are preserving the order. */ + SMARTLIST_FOREACH(conn->active_circuit_pqueue, cell_ewma_t *, e, { + tor_assert(e->last_adjusted_tick == + conn->active_circuit_pqueue_last_recalibrated); + e->cell_count *= factor; + e->last_adjusted_tick = cur_tick; + }); + conn->active_circuit_pqueue_last_recalibrated = cur_tick; +} + +/** Rescale ewma to the same scale as conn, and add it to + * conn's priority queue of active circuits */ +static void +add_cell_ewma_to_conn(or_connection_t *conn, cell_ewma_t *ewma) +{ + tor_assert(ewma->heap_index == -1); + scale_single_cell_ewma(ewma, + conn->active_circuit_pqueue_last_recalibrated); + + smartlist_pqueue_add(conn->active_circuit_pqueue, + compare_cell_ewma_counts, + STRUCT_OFFSET(cell_ewma_t, heap_index), + ewma); +} + +/** Remove ewma from conn's priority queue of active circuits */ +static void +remove_cell_ewma_from_conn(or_connection_t *conn, cell_ewma_t *ewma) +{ + tor_assert(ewma->heap_index != -1); + smartlist_pqueue_remove(conn->active_circuit_pqueue, + compare_cell_ewma_counts, + STRUCT_OFFSET(cell_ewma_t, heap_index), + ewma); +} + +/** Remove and return the first cell_ewma_t from conn's priority queue of + * active circuits. Requires that the priority queue is nonempty. */ +static cell_ewma_t * +pop_first_cell_ewma_from_conn(or_connection_t *conn) +{ + return smartlist_pqueue_pop(conn->active_circuit_pqueue, + compare_cell_ewma_counts, + STRUCT_OFFSET(cell_ewma_t, heap_index)); +} + /** Add circ to the list of circuits with pending cells on * conn. No effect if circ is already linked. */ void @@ -1765,6 +1953,8 @@ make_circuit_active_on_conn(circuit_t *circ, or_connection_t *conn) return; } + assert_active_circuits_ok_paranoid(conn); + if (! conn->active_circuits) { conn->active_circuits = circ; *prevp = *nextp = circ; @@ -1776,6 +1966,15 @@ make_circuit_active_on_conn(circuit_t *circ, or_connection_t *conn) *prev_circ_on_conn_p(head, conn) = circ; *prevp = old_tail; } + + if (circ->n_conn == conn) { + add_cell_ewma_to_conn(conn, &circ->n_cell_ewma); + } else { + or_circuit_t *orcirc = TO_OR_CIRCUIT(circ); + tor_assert(conn == orcirc->p_conn); + add_cell_ewma_to_conn(conn, &orcirc->p_cell_ewma); + } + assert_active_circuits_ok_paranoid(conn); } @@ -1793,6 +1992,8 @@ make_circuit_inactive_on_conn(circuit_t *circ, or_connection_t *conn) return; } + assert_active_circuits_ok_paranoid(conn); + tor_assert(next && prev); tor_assert(*prev_circ_on_conn_p(next, conn) == circ); tor_assert(*next_circ_on_conn_p(prev, conn) == circ); @@ -1806,6 +2007,15 @@ make_circuit_inactive_on_conn(circuit_t *circ, or_connection_t *conn) conn->active_circuits = next; } *prevp = *nextp = NULL; + + if (circ->n_conn == conn) { + remove_cell_ewma_from_conn(conn, &circ->n_cell_ewma); + } else { + or_circuit_t *orcirc = TO_OR_CIRCUIT(circ); + tor_assert(conn == orcirc->p_conn); + remove_cell_ewma_from_conn(conn, &orcirc->p_cell_ewma); + } + assert_active_circuits_ok_paranoid(conn); } @@ -1825,6 +2035,10 @@ connection_or_unlink_all_active_circs(or_connection_t *orconn) cur = next; } while (cur != head); orconn->active_circuits = NULL; + + SMARTLIST_FOREACH(orconn->active_circuit_pqueue, cell_ewma_t *, e, + e->heap_index = -1); + smartlist_clear(orconn->active_circuit_pqueue); } /** Block (if block is true) or unblock (if block is false) @@ -1884,92 +2098,27 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, /* The EWMA cell counter for the circuit we're flushing. */ cell_ewma_t *cell_ewma = NULL; - - /* The global cell EWMA parameters. The algorithm is parameterized by - * two values (type double): - * - * "significance" (between 0 and 1) and "interval" - * - * The cell count is weighted so that the most recent "interval" - * seconds will account for "significance" of the weight. - * - * If "interval" is set to 0, it disables the algorithm, and the old - * algorithm (round-robin) is used. - * - * These parameters should really be set by the consensus, but can be - * overridden by the torrc (in which case the options values will be - * >= 0.0). - */ - static double cell_ewma_significance = 0.9; - static double cell_ewma_interval = 10.0; - - double significance_override = get_options()->EWMASignificance; - double interval_override = get_options()->EWMAInterval; - if (significance_override >= 0.0) { - cell_ewma_significance = significance_override; - } - if (interval_override >= 0.0) { - cell_ewma_interval = interval_override; - } + double ewma_increment = -1; circ = conn->active_circuits; if (!circ) return 0; assert_active_circuits_ok_paranoid(conn); /* See if we're doing the ewma circuit selection algorithm. */ - if (cell_ewma_interval > 0.0) { - /* Is there another circuit we might like better? */ - circuit_t *circ_iter, *circ_start; - circuit_t *circ_min_cell_count = NULL; - double min_cell_count = 0.0; + if (ewma_scale_factor > 0.0) { + unsigned tick; + double fractional_tick; tor_gettimeofday_cached(&now_hires); + tick = cell_ewma_tick_from_timeval(&now_hires, &fractional_tick); - /* Start with circ, and go around the circular linked list */ - circ_start = circ_iter = circ; - do { - double delta_t; - - /* Find the appropriate EWMA cell counter to use. */ - if (circ_iter->n_conn == conn) { - cell_ewma = &(circ_iter->n_cell_ewma); - } else { - cell_ewma = &(TO_OR_CIRCUIT(circ_iter)->p_cell_ewma); - } - - /* Update the EWMA cell counter to account for the passage of time. */ - delta_t = (double)(now_hires.tv_sec - - cell_ewma->last_cell_time.tv_sec); - delta_t += ((double)(now_hires.tv_usec - - cell_ewma->last_cell_time.tv_usec)) / 1000000.0; - - if (delta_t > 0.0) { - cell_ewma->cell_count *= - pow(cell_ewma_significance, delta_t / cell_ewma_interval); - //printf("cc: %f ", cell_ewma->cell_count); - } - cell_ewma->last_cell_time = now_hires; - - /* Now keep track of the lowest cell count we've seen. */ - if (circ_min_cell_count == NULL || - cell_ewma->cell_count < min_cell_count) { - min_cell_count = cell_ewma->cell_count; - circ_min_cell_count = circ_iter; - } - - circ_iter = *next_circ_on_conn_p(circ_iter, conn); - } while (circ_iter != circ_start); - - /* OK, we've gone all the way around. Let's use the circ with the - * lowest (recent) cell count. */ - circ = circ_min_cell_count; - - /* Now set the appropriate EWMA cell counter to use below to add the - * cells we actually send. */ - if (circ_min_cell_count->n_conn == conn) { - cell_ewma = &(circ_min_cell_count->n_cell_ewma); - } else { - cell_ewma = &(TO_OR_CIRCUIT(circ_min_cell_count)->p_cell_ewma); + if (tick != conn->active_circuit_pqueue_last_recalibrated) { + scale_active_circuits(conn, tick); } + + ewma_increment = pow(ewma_scale_factor, -fractional_tick); + + cell_ewma = smartlist_get(conn->active_circuit_pqueue, 0); + circ = cell_ewma_to_circuit(cell_ewma); } if (circ->n_conn == conn) { @@ -2028,7 +2177,14 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, packed_cell_free_unchecked(cell); ++n_flushed; if (cell_ewma) { - cell_ewma->cell_count += 1.0; + cell_ewma_t *tmp; + cell_ewma->cell_count += ewma_increment; + /* We pop and re-add the cell_ewma_t here, not above, since we need to + * re-add it immediately to keep the priority queue consistent with + * the linked-list implementation */ + tmp = pop_first_cell_ewma_from_conn(conn); + tor_assert(tmp == cell_ewma); + add_cell_ewma_to_conn(conn, cell_ewma); } if (circ != conn->active_circuits) { /* If this happens, the current circuit just got made inactive by @@ -2049,7 +2205,7 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, if (streams_blocked && queue->n <= CELL_QUEUE_LOWWATER_SIZE) set_streams_blocked_on_circ(circ, conn, 0); /* unblock streams */ - /* Did we just ran out of cells on this queue? */ + /* Did we just ran out of cells on this circuit's queue? */ if (queue->n == 0) { log_debug(LD_GENERAL, "Made a circuit inactive."); make_circuit_inactive_on_conn(circ, conn); @@ -2172,16 +2328,31 @@ assert_active_circuits_ok(or_connection_t *orconn) { circuit_t *head = orconn->active_circuits; circuit_t *cur = head; + int n = 0; if (! head) return; do { circuit_t *next = *next_circ_on_conn_p(cur, orconn); circuit_t *prev = *prev_circ_on_conn_p(cur, orconn); + cell_ewma_t *ewma; tor_assert(next); tor_assert(prev); tor_assert(*next_circ_on_conn_p(prev, orconn) == cur); tor_assert(*prev_circ_on_conn_p(next, orconn) == cur); + if (orconn == cur->n_conn) { + ewma = &cur->n_cell_ewma; + tor_assert(!ewma->is_for_p_conn); + } else { + ewma = &TO_OR_CIRCUIT(cur)->p_cell_ewma; + tor_assert(ewma->is_for_p_conn); + } + tor_assert(ewma->heap_index != -1); + tor_assert(ewma == smartlist_get(orconn->active_circuit_pqueue, + ewma->heap_index)); + n++; cur = next; } while (cur != head); + + tor_assert(n == smartlist_len(orconn->active_circuit_pqueue)); } From 27b7746c519125cac703f38a8b325d3a1c08619b Mon Sep 17 00:00:00 2001 From: Sebastian Hahn Date: Mon, 14 Dec 2009 04:59:55 +0100 Subject: [PATCH 05/10] Fix Snow Leopard compile and a codestyle violation When calculating the current tick, cap (tv_sec / EWMA_TICK_LEN) to an unsigned int. --- src/or/relay.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/or/relay.c b/src/or/relay.c index 19bb6871ce..b7e8871766 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -1817,7 +1817,7 @@ cell_ewma_to_circuit(cell_ewma_t *ewma) #define EWMA_DEFAULT_SCALE_FACTOR 0.9 /** Given a timeval 'now', compute the cell_ewma tick in which it occurs - * and the fraction of the tick that has elapsed before + * and the fraction of the tick that has elapsed before * * These tick values are not meant to be shared between Tor instances, or used * for other purposes. */ @@ -1825,7 +1825,7 @@ static unsigned cell_ewma_tick_from_timeval(const struct timeval *now, double *remainder_out) { - unsigned res = now->tv_sec / EWMA_TICK_LEN; + unsigned res = (unsigned) (now->tv_sec / EWMA_TICK_LEN); /* rem */ double rem = (now->tv_sec % EWMA_TICK_LEN) + ((double)(now->tv_usec)) / 1.0e6; From 2c672f73bfdcb4cf44a0dbcb4706b804d2273379 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Tue, 15 Dec 2009 13:20:02 -0500 Subject: [PATCH 06/10] Fix comment typos in container.c --- src/common/container.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/container.c b/src/common/container.c index 8b3bbeac52..f452a51e42 100644 --- a/src/common/container.c +++ b/src/common/container.c @@ -725,7 +725,7 @@ smartlist_pqueue_add(smartlist_t *sl, } /** Remove and return the top-priority item from the heap stored in sl, - * where order is determined by compare and the item's position in is + * where order is determined by compare and the item's position is * stored at position idx_field_offset within the item. sl must * not be empty. */ void * @@ -747,7 +747,7 @@ smartlist_pqueue_pop(smartlist_t *sl, } /** Remove the item item from the heap stored in sl, - * where order is determined by compare and the item's position in is + * where order is determined by compare and the item's position is * stored at position idx_field_offset within the item. sl must * not be empty. */ void From 1292a9ddfeef5d9560f4493d6767a10b181f031e Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Tue, 15 Dec 2009 13:20:22 -0500 Subject: [PATCH 07/10] Fix various comment typos in ewma patch; found by arma. --- src/or/or.h | 4 ++-- src/or/relay.c | 15 +++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/or/or.h b/src/or/or.h index 8be84910f1..fd913ce4ef 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1083,7 +1083,7 @@ typedef struct or_connection_t { * cell_ewma algorithm for choosing circuits, we can remove active_circuits. */ smartlist_t *active_circuit_pqueue; - /** The tick on which the ciell_ewma_t's in active_circuit_pqueue last had + /** The tick on which the cell_ewma_ts in active_circuit_pqueue last had * their ewma values rescaled. */ unsigned active_circuit_pqueue_last_recalibrated; struct or_connection_t *next_with_same_id; /**< Next connection with same @@ -2018,7 +2018,7 @@ typedef struct { unsigned last_adjusted_tick; /** The EWMA of the cell count. */ double cell_count; - /** True iff this is a the cell count for a circuit's previous + /** True iff this is the cell count for a circuit's previous * connection. */ unsigned int is_for_p_conn : 1; /** The position of the circuit within the or connection's priority diff --git a/src/or/relay.c b/src/or/relay.c index 19bb6871ce..4a27df6efe 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -1787,9 +1787,10 @@ cell_ewma_to_circuit(cell_ewma_t *ewma) Conceptually, we take an exponentially weighted mean average of the number of cells a circuit has sent, and allow active circuits (those with cells to - relay) to send cells in order of their exponentially-weighted mean average - (EWMA) cell count. [That is, a cell sent N seconds ago 'counts' F^N times - as much as a cell sent now, for 0now, compute the cell_ewma tick in which it occurs + * and the fraction of the tick that has elapsed between the start of the tick + * and now. Return the former and store the latter in + * *remainder_out. * * These tick values are not meant to be shared between Tor instances, or used * for other purposes. */ @@ -2205,7 +2208,7 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, if (streams_blocked && queue->n <= CELL_QUEUE_LOWWATER_SIZE) set_streams_blocked_on_circ(circ, conn, 0); /* unblock streams */ - /* Did we just ran out of cells on this circuit's queue? */ + /* Did we just run out of cells on this circuit's queue? */ if (queue->n == 0) { log_debug(LD_GENERAL, "Made a circuit inactive."); make_circuit_inactive_on_conn(circ, conn); From 60b01c6d5e7afbb4eee6f7e57583fae4a7278d58 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Tue, 15 Dec 2009 13:53:53 -0500 Subject: [PATCH 08/10] Change interface for configuring cell ewma algorithm. The rule is now: take the value from the CircuitPriorityHalflife config option if it is set. If it zero, disable the cell_ewma algorithm. If it is set, use it to calculate the scaling factor. If it is not set, look for a CircPriorityHalflifeMsec parameter in the consensus networkstatus. If *that* is zero, then disable the cell_ewma algorithm; if it is set, use it to calculate the scaling factor. If it is not set at all, disable the algorithm. --- src/or/config.c | 8 +++---- src/or/networkstatus.c | 1 + src/or/or.h | 24 ++++++++++++------- src/or/relay.c | 53 +++++++++++++++++++++++++++++++----------- 4 files changed, 60 insertions(+), 26 deletions(-) diff --git a/src/or/config.c b/src/or/config.c index a22ec4b13d..bb762ee311 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -167,6 +167,7 @@ static config_var_t _option_vars[] = { V(CircuitBuildTimeout, INTERVAL, "0"), V(CircuitIdleTimeout, INTERVAL, "1 hour"), V(CircuitStreamTimeout, INTERVAL, "0"), + V(CircuitPriorityHalflife, DOUBLE, "-100.0"), /*negative:'Use default'*/ V(ClientDNSRejectInternalAddresses, BOOL,"1"), V(ClientOnly, BOOL, "0"), V(ConsensusParams, STRING, NULL), @@ -212,6 +213,7 @@ static config_var_t _option_vars[] = { V(ExitPolicyRejectPrivate, BOOL, "1"), V(ExitPortStatistics, BOOL, "0"), V(ExtraInfoStatistics, BOOL, "0"), + V(FallbackNetworkstatusFile, FILENAME, SHARE_DATADIR PATH_SEPARATOR "tor" PATH_SEPARATOR "fallback-consensus"), V(FascistFirewall, BOOL, "0"), @@ -356,10 +358,6 @@ static config_var_t _option_vars[] = { NULL), V(MinUptimeHidServDirectoryV2, INTERVAL, "24 hours"), - /* Options for EWMA selection of circuit to write from */ - VAR("EWMASignificance", DOUBLE, EWMASignificance, "-1.0"), - VAR("EWMAInterval", DOUBLE, EWMAInterval, "-1.0"), - { NULL, CONFIG_TYPE_OBSOLETE, 0, NULL } }; @@ -1380,7 +1378,7 @@ options_act(or_options_t *old_options) configure_accounting(time(NULL)); /* Change the cell EWMA settings */ - cell_ewma_set_scale_factor(options); + cell_ewma_set_scale_factor(options, networkstatus_get_latest_consensus()); /* Check for transitions that need action. */ if (old_options) { diff --git a/src/or/networkstatus.c b/src/or/networkstatus.c index f3925f1b89..bebe87353b 100644 --- a/src/or/networkstatus.c +++ b/src/or/networkstatus.c @@ -1679,6 +1679,7 @@ networkstatus_set_current_consensus(const char *consensus, update_consensus_networkstatus_fetch_time(now); dirvote_recalculate_timing(get_options(), now); routerstatus_list_update_named_server_map(); + cell_ewma_set_scale_factor(get_options(), current_consensus); } if (!from_cache) { diff --git a/src/or/or.h b/src/or/or.h index fd913ce4ef..745a0ce919 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2783,13 +2783,20 @@ typedef struct { * to make this false. */ int ReloadTorrcOnSIGHUP; - /* The EWMA parameters for circuit selection within a connection. - * The most recent EWMAInterval seconds will account for an - * EWMASignificance (between 0 and 1) portion of the weight. - * If these values are negative, use the global defaults (soon to be - * set in the consensus). */ - double EWMASignificance; - double EWMAInterval; + /* The main parameter for picking circuits within a connection. + * + * If this value is positive, when picking a cell to relay on a connection, + * we always relay from the circuit whose weighted cell count is lowest. + * Cells are weighted exponentially such that if one cell is sent + * 'CircuitPriorityHalflife' seconds before another, it counts for half as + * much. + * + * If this value is zero, we're disabling the cell-EWMA algorithm. + * + * If this value is negative, we're using the default approach + * according to either Tor or a parameter set in the consensus. + */ + double CircuitPriorityHalflife; } or_options_t; @@ -4491,7 +4498,8 @@ const char *decode_address_from_payload(tor_addr_t *addr_out, const char *payload, int payload_len); unsigned cell_ewma_get_tick(void); -void cell_ewma_set_scale_factor(or_options_t *options); +void cell_ewma_set_scale_factor(or_options_t *options, + networkstatus_t *consensus); /********************************* rephist.c ***************************/ diff --git a/src/or/relay.c b/src/or/relay.c index 1be09e1df0..ae1b062cf6 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -1814,8 +1814,8 @@ cell_ewma_to_circuit(cell_ewma_t *ewma) #define EWMA_TICK_LEN 10 /** The default per-tick scale factor, if it hasn't been overridden by a - * consensus or a configuration setting. */ -#define EWMA_DEFAULT_SCALE_FACTOR 0.9 + * consensus or a configuration setting. zero means "disabled". */ +#define EWMA_DEFAULT_HALFLIFE 0.0 /** Given a timeval now, compute the cell_ewma tick in which it occurs * and the fraction of the tick that has elapsed between the start of the tick @@ -1846,24 +1846,51 @@ cell_ewma_get_tick(void) /** The per-tick scale factor to be used when computing cell-count EWMA * values. (A cell sent N ticks before the start of the current tick * has value ewma_scale_factor ** N.) - * - * If ewma_scale_factor is <= 0, the EWMA algorithm is disabled. */ -static double ewma_scale_factor = EWMA_DEFAULT_SCALE_FACTOR; +static double ewma_scale_factor = 0.1; +static int ewma_enabled = 0; + +#define EPSILON 0.00001 +#define LOG_ONEHALF -0.69314718055994529 /** Adjust the global cell scale factor based on options */ void -cell_ewma_set_scale_factor(or_options_t *options) +cell_ewma_set_scale_factor(or_options_t *options, networkstatus_t *consensus) { - double f; - if (options->EWMAInterval > 0.0001) { - f = pow(options->EWMASignificance, - EWMA_TICK_LEN / options->EWMAInterval); + int32_t halflife_ms; + double halflife; + const char *source; + if (options && options->CircuitPriorityHalflife >= -EPSILON) { + halflife = options->CircuitPriorityHalflife; + source = "CircuitPriorityHalflife in configuration"; + } else if (consensus && + (halflife_ms = networkstatus_get_param( + consensus, "CircPriorityHalflifeMsec", -1) >= 0)) { + halflife = ((double)halflife_ms)/1000.0; + source = "CircPriorityHalflifeMsec in consensus"; } else { - f = EWMA_DEFAULT_SCALE_FACTOR; + halflife = EWMA_DEFAULT_HALFLIFE; + source = "Default value"; } - ewma_scale_factor = f; + if (halflife <= EPSILON) { + /* The cell EWMA algorithm is disabled. */ + ewma_scale_factor = 0.1; + ewma_enabled = 0; + log_info(LD_OR, + "Disabled cell_ewma algorithm because of value in %s", + source); + } else { + /* convert halflife into halflife-per-tick. */ + halflife /= EWMA_TICK_LEN; + /* compute per-tick scale factor. */ + ewma_scale_factor = exp( LOG_ONEHALF / halflife ); + ewma_enabled = 1; + log_info(LD_OR, + "Enabled cell_ewma algorithm because of value in %s; " + "scale factor is %lf per %d seconds", + source, ewma_scale_factor, EWMA_TICK_LEN); + } } /** Return the multiplier necessary to convert the value of a cell sent in @@ -2108,7 +2135,7 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, assert_active_circuits_ok_paranoid(conn); /* See if we're doing the ewma circuit selection algorithm. */ - if (ewma_scale_factor > 0.0) { + if (ewma_enabled) { unsigned tick; double fractional_tick; tor_gettimeofday_cached(&now_hires); From 7ab824c8cf2cdb683253870ff2f54bbfd12223d4 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Tue, 15 Dec 2009 14:04:30 -0500 Subject: [PATCH 09/10] Document CircuitPriorityHalflife on the manpage --- doc/tor.1.in | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/tor.1.in b/doc/tor.1.in index 4bcab6d04d..721bbaa05e 100644 --- a/doc/tor.1.in +++ b/doc/tor.1.in @@ -419,6 +419,19 @@ ORPort. (Default: 1) \fBPreferTunneledDirConns \fR\fB0\fR|\fB1\fP If non-zero, we will avoid directory servers that don't support tunneled directory connections, when possible. (Default: 1) +.LP +.TP +\fBCircuitPriorityHalflife \fR\fBNUM\fB1\fP +If this value is set, we override the default algorithm for choosing which +circuit's cells to deliver or relay first. When the value is 0, we +round-robin between the active circuits on a connection, delivering one cell +from each in turn. When the value is positive, we prefer delivering cells +from whichever connection has the lowest weighted cell count, where cells are +weighted exponentially according to the supplied CircuitPrioirityHalflife +value (in seconds). If this option is not set at all, we use the behavior +recommended in the current consensus networkstatus. +This is an advanced option; you generally shouldn't have mess with it. +(Default: not set.) .SH CLIENT OPTIONS .PP From 5e2eaa37f1984e3de122568062d364283402bdf3 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 18 Dec 2009 22:32:56 -0500 Subject: [PATCH 10/10] Add a changelog entry for the circuit priority logic --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index 77d6f16ec4..f8590345b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,14 @@ Changes in version 0.2.2.7-alpha - 2009-??-?? + o Major features: + - When choosing which cells to relay first, we can now favor circuits + that have been quiet recently, so as to get lower latency for + low-volume circuits. By default, relays enable or disable this + feature based on a setting in the consensus. Preliminary testing + suggests that this should make the network feel faster as more relays + use it. You can override this default by using the new + "CircuitPriorityHalflife" config option. Design and code by Ian + Goldberg, Can Tang, and Chris Alexander. + o Minor features: - New config option "CircuitStreamTimeout" to override our internal timeout schedule for how many seconds until we detach a stream from