From e106812a778f53760c819ab20a214ac3222b3b15 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 9 Aug 2012 12:21:37 -0400 Subject: [PATCH 1/8] Change smartlist_choose_node_by_bandwidth to avoid double This should make our preferred solution to #6538 easier to implement, avoid a bunch of potential nastiness with excessive int-vs-double math, and generally make the code there a little less scary. "But wait!" you say. "Is it really safe to do this? Won't the results come out differently?" Yes, but not much. We now round every weighted bandwidth to the nearest byte before computing on it. This will make every node that had a fractional part of its weighted bandwidth before either slighty more likely or slightly less likely. Further, the rand_bw value was only ever set with integer precision, so it can't accurately sample routers with tiny fractional bandwidth values anyway. Finally, doing repeated double-vs-uint64 comparisons is just plain sad; it will involve an implicit cast to double, which is never a fun thing. --- changes/bug6538 | 4 ++++ configure.in | 1 + src/common/util.c | 15 +++++++++++++++ src/common/util.h | 1 + src/or/routerlist.c | 46 ++++++++++++++++++++++++++------------------- 5 files changed, 48 insertions(+), 19 deletions(-) create mode 100644 changes/bug6538 diff --git a/changes/bug6538 b/changes/bug6538 new file mode 100644 index 0000000000..1e882eb1cc --- /dev/null +++ b/changes/bug6538 @@ -0,0 +1,4 @@ + o Minor bugfixes: + - Switch weighted node selection rule from using a list of doubles + to using a list of int64_t. This should make the process slightly + easier to debug and maintain. Needed for fix for bug 6538. diff --git a/configure.in b/configure.in index 1db7d08e20..f5f8eacb7e 100644 --- a/configure.in +++ b/configure.in @@ -300,6 +300,7 @@ AC_CHECK_FUNCS( gmtime_r \ inet_aton \ ioctl \ + llround \ localtime_r \ lround \ memmem \ diff --git a/src/common/util.c b/src/common/util.c index a0dff2e35b..60222f4083 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -378,6 +378,21 @@ tor_lround(double d) #endif } +/** Return the 64-bit integer closest to d. We define this wrapper here so + * that not all users of math.h need to use the right incancations to get the + * c99 functions. */ +int64_t +tor_llround(double d) +{ +#if defined(HAVE_LLROUND) + return (int64_t)llround(d); +#elif defined(HAVE_RINT) + return (int64_t)rint(d); +#else + return (int64_t)(d > 0 ? d + 0.5 : ceil(d - 0.5)); +#endif +} + /** Returns floor(log2(u64)). If u64 is 0, (incorrectly) returns 0. */ int tor_log2(uint64_t u64) diff --git a/src/common/util.h b/src/common/util.h index a2ab0ccac8..f700e9f4cb 100644 --- a/src/common/util.h +++ b/src/common/util.h @@ -161,6 +161,7 @@ void tor_log_mallinfo(int severity); /* Math functions */ double tor_mathlog(double d) ATTR_CONST; long tor_lround(double d) ATTR_CONST; +int64_t tor_llround(double d) ATTR_CONST; int tor_log2(uint64_t u64) ATTR_CONST; uint64_t round_to_power_of_2(uint64_t u64); unsigned round_to_next_multiple_of(unsigned number, unsigned divisor); diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 4979b933ad..382d45746c 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -1702,12 +1702,12 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, bandwidth_weight_rule_t rule) { int64_t weight_scale; - int64_t rand_bw; + uint64_t rand_bw; double Wg = -1, Wm = -1, We = -1, Wd = -1; double Wgb = -1, Wmb = -1, Web = -1, Wdb = -1; - double weighted_bw = 0, unweighted_bw = 0; - double *bandwidths; - double tmp = 0; + uint64_t weighted_bw = 0, unweighted_bw = 0; + uint64_t *bandwidths; + uint64_t tmp; unsigned int i; unsigned int i_chosen; unsigned int i_has_been_chosen; @@ -1792,7 +1792,7 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, Web /= weight_scale; Wdb /= weight_scale; - bandwidths = tor_malloc_zero(sizeof(double)*smartlist_len(sl)); + bandwidths = tor_malloc_zero(sizeof(uint64_t)*smartlist_len(sl)); // Cycle through smartlist and total the bandwidth. SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) { @@ -1831,24 +1831,30 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, } else { // middle weight = (is_dir ? Wmb*Wm : Wm); } + /* These should be impossible; but overflows here would be bad, so let's + * make sure. */ + if (this_bw < 0) + this_bw = 0; + if (weight < 0.0) + weight = 0.0; - bandwidths[node_sl_idx] = weight*this_bw; - weighted_bw += weight*this_bw; + bandwidths[node_sl_idx] = tor_llround(weight*this_bw + 0.5); + weighted_bw += bandwidths[node_sl_idx]; unweighted_bw += this_bw; if (is_me) - sl_last_weighted_bw_of_me = weight*this_bw; + sl_last_weighted_bw_of_me = bandwidths[node_sl_idx]; } SMARTLIST_FOREACH_END(node); /* XXXX this is a kludge to expose these values. */ sl_last_total_weighted_bw = weighted_bw; log_debug(LD_CIRC, "Choosing node for rule %s based on weights " - "Wg=%f Wm=%f We=%f Wd=%f with total bw %f", + "Wg=%f Wm=%f We=%f Wd=%f with total bw "U64_FORMAT, bandwidth_weight_rule_to_string(rule), - Wg, Wm, We, Wd, weighted_bw); + Wg, Wm, We, Wd, U64_PRINTF_ARG(weighted_bw)); /* If there is no bandwidth, choose at random */ - if (DBL_TO_U64(weighted_bw) == 0) { + if (weighted_bw == 0) { /* Don't warn when using bridges/relays not in the consensus */ if (!have_unknown) { #define ZERO_BANDWIDTH_WARNING_INTERVAL (15) @@ -1858,24 +1864,25 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, if ((msg = rate_limit_log(&zero_bandwidth_warning_limit, approx_time()))) { log_warn(LD_CIRC, - "Weighted bandwidth is %f in node selection for rule %s " - "(unweighted was %f) %s", - weighted_bw, bandwidth_weight_rule_to_string(rule), - unweighted_bw, msg); + "Weighted bandwidth is "U64_FORMAT" in node selection for " + "rule %s (unweighted was "U64_FORMAT") %s", + U64_PRINTF_ARG(weighted_bw), + bandwidth_weight_rule_to_string(rule), + U64_PRINTF_ARG(unweighted_bw), msg); } } tor_free(bandwidths); return smartlist_choose(sl); } - rand_bw = crypto_rand_uint64(DBL_TO_U64(weighted_bw)); + rand_bw = crypto_rand_uint64(weighted_bw); rand_bw++; /* crypto_rand_uint64() counts from 0, and we need to count * from 1 below. See bug 1203 for details. */ /* Last, count through sl until we get to the element we picked */ i_chosen = (unsigned)smartlist_len(sl); i_has_been_chosen = 0; - tmp = 0.0; + tmp = 0; for (i=0; i < (unsigned)smartlist_len(sl); i++) { tmp += bandwidths[i]; if (tmp >= rand_bw && !i_has_been_chosen) { @@ -1892,8 +1899,9 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, --i; log_warn(LD_BUG, "Round-off error in computing bandwidth had an effect on " " which router we chose. Please tell the developers. " - "%f " U64_FORMAT " %f", tmp, U64_PRINTF_ARG(rand_bw), - weighted_bw); + U64_FORMAT" "U64_FORMAT" "U64_FORMAT, + U64_PRINTF_ARG(tmp), U64_PRINTF_ARG(rand_bw), + U64_PRINTF_ARG(weighted_bw)); } tor_free(bandwidths); return smartlist_get(sl, i); From 640a51684ce5a6cdae5c5f92cd2f932922380c00 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 9 Aug 2012 12:37:40 -0400 Subject: [PATCH 2/8] Remove remaining timing-dependency in choosing nodes by bandwidth The old approach, because of its "tmp >= rand_bw && !i_has_been_chosen" check, would run through the second part of the loop slightly slower than the first part. Now, we remove i_has_been_chosen, and instead set rand_bw = UINT64_MAX, so that every instance of the loop will do exactly the same amount of work regardless of the initial value of rand_bw. Fix for bug 6538. --- changes/bug6538 | 8 ++++++++ src/or/routerlist.c | 12 ++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/changes/bug6538 b/changes/bug6538 index 1e882eb1cc..fc9e583d52 100644 --- a/changes/bug6538 +++ b/changes/bug6538 @@ -2,3 +2,11 @@ - Switch weighted node selection rule from using a list of doubles to using a list of int64_t. This should make the process slightly easier to debug and maintain. Needed for fix for bug 6538. + + o Security features: + - Switch to a completely time-invariant approach for picking nodes + weighted by bandwidth. Our old approach would run through the + part of the loop after it had made its choice slightly slower + than it ran through the part of the loop before it had made its + choice. Fix for bug 6538. + diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 382d45746c..75cb3f2e69 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -1710,7 +1710,6 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, uint64_t tmp; unsigned int i; unsigned int i_chosen; - unsigned int i_has_been_chosen; int have_unknown = 0; /* true iff sl contains element not in consensus. */ /* Can't choose exit and guard at same time */ @@ -1881,13 +1880,12 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, /* Last, count through sl until we get to the element we picked */ i_chosen = (unsigned)smartlist_len(sl); - i_has_been_chosen = 0; tmp = 0; for (i=0; i < (unsigned)smartlist_len(sl); i++) { tmp += bandwidths[i]; - if (tmp >= rand_bw && !i_has_been_chosen) { + if (tmp >= rand_bw) { i_chosen = i; - i_has_been_chosen = 1; + rand_bw = UINT64_MAX; } } i = i_chosen; @@ -1926,7 +1924,6 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, { unsigned int i; unsigned int i_chosen; - unsigned int i_has_been_chosen; int32_t *bandwidths; int is_exit; int is_guard; @@ -2127,7 +2124,6 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, /* Last, count through sl until we get to the element we picked */ tmp = 0; i_chosen = (unsigned)smartlist_len(sl); - i_has_been_chosen = 0; for (i=0; i < (unsigned)smartlist_len(sl); i++) { is_exit = bitarray_is_set(exit_bits, i); is_guard = bitarray_is_set(guard_bits, i); @@ -2142,9 +2138,9 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, else tmp += bandwidths[i]; - if (tmp >= rand_bw && !i_has_been_chosen) { + if (tmp >= rand_bw) { i_chosen = i; - i_has_been_chosen = 1; + rand_bw = UINT64_MAX; } } i = i_chosen; From 50aecc68ca33f53f7de086990d5b1856c4d40ab8 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 9 Aug 2012 12:41:28 -0400 Subject: [PATCH 3/8] Use a smarter fix for bug 1203. Previously, we had incremented rand_bw so that when we later tested "tmp >= rand_bw", we wouldn't have an off-by-one error. But instead, it makes more sense to leave rand_bw alone and test "tmp > rand_bw". Note that this is still safe. To take the example from the bug1203 writeup: Suppose that we have 3 nodes with bandwidth 1. So the bandwidth array is { 1, 1, 1 }, and the total bandwidth is 3. We choose rand_bw == 0, 1, or 2. With the first iteration of the loop, tmp is now 1; with the second, tmp is 2; with the third, tmp is 3. Now that our check is tmp > rand_bw, we will set i in the first iteration of the loop iff rand_bw == 0; in the second iteration of the loop iff rand_bw == 1, and in the third iff rand_bw == 2. That's what we want. Incidentally, this change makes the bug 6538 fix more ironclad: once rand_bw is set to UINT64_MAX, tmp > rand_bw is obviously false regardless of the value of tmp. --- src/or/routerlist.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 75cb3f2e69..67ad2dfe1c 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -1875,15 +1875,13 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, } rand_bw = crypto_rand_uint64(weighted_bw); - rand_bw++; /* crypto_rand_uint64() counts from 0, and we need to count - * from 1 below. See bug 1203 for details. */ /* Last, count through sl until we get to the element we picked */ i_chosen = (unsigned)smartlist_len(sl); tmp = 0; for (i=0; i < (unsigned)smartlist_len(sl); i++) { tmp += bandwidths[i]; - if (tmp >= rand_bw) { + if (tmp > rand_bw) { i_chosen = i; rand_bw = UINT64_MAX; } @@ -2118,8 +2116,6 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, /* Almost done: choose a random value from the bandwidth weights. */ rand_bw = crypto_rand_uint64(total_bw); - rand_bw++; /* crypto_rand_uint64() counts from 0, and we need to count - * from 1 below. See bug 1203 for details. */ /* Last, count through sl until we get to the element we picked */ tmp = 0; @@ -2138,7 +2134,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, else tmp += bandwidths[i]; - if (tmp >= rand_bw) { + if (tmp > rand_bw) { i_chosen = i; rand_bw = UINT64_MAX; } From 9bfb274abb9f9e5d445a75f0b67b433be823a730 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 9 Aug 2012 12:59:04 -0400 Subject: [PATCH 4/8] Refactor smartlist_choose_node_by_bandwidth to be less horrible. With this patch, I dump the old kludge of using magic negative numbers to indicate unknown bandwidths. I also compute each node's weighted bandwidth exactly once, rather than computing it once in a loop to compute the total weighted bandwidth and a second time in a loop to find which one we picked. --- src/or/routerlist.c | 69 ++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 42 deletions(-) diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 67ad2dfe1c..801c4965ea 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -1922,15 +1922,17 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, { unsigned int i; unsigned int i_chosen; - int32_t *bandwidths; + uint64_t *bandwidths; int is_exit; int is_guard; + int is_fast; uint64_t total_nonexit_bw = 0, total_exit_bw = 0, total_bw = 0; uint64_t total_nonguard_bw = 0, total_guard_bw = 0; uint64_t rand_bw, tmp; double exit_weight; double guard_weight; int n_unknown = 0; + bitarray_t *fast_bits; bitarray_t *exit_bits; bitarray_t *guard_bits; int me_idx = -1; @@ -1954,10 +1956,9 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, } /* First count the total bandwidth weight, and make a list - * of each value. <0 means "unknown; no routerinfo." We use the - * bits of negative values to remember whether the router was fast (-x)&1 - * and whether it was an exit (-x)&2 or guard (-x)&4. Yes, it's a hack. */ - bandwidths = tor_malloc(sizeof(int32_t)*smartlist_len(sl)); + * of each value. We use UINT64_MAX to indicate "unknown". */ + bandwidths = tor_malloc_zero(sizeof(uint64_t)*smartlist_len(sl)); + fast_bits = bitarray_init_zero(smartlist_len(sl)); exit_bits = bitarray_init_zero(smartlist_len(sl)); guard_bits = bitarray_init_zero(smartlist_len(sl)); @@ -1965,7 +1966,6 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) { /* first, learn what bandwidth we think i has */ int is_known = 1; - int32_t flags = 0; uint32_t this_bw = 0; i = node_sl_idx; @@ -1978,12 +1978,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, if (node->rs->has_bandwidth) { this_bw = kb_to_bytes(node->rs->bandwidth); } else { /* guess */ - /* XXX024 once consensuses always list bandwidths, we can take - * this guessing business out. -RD */ is_known = 0; - flags = node->rs->is_fast ? 1 : 0; - flags |= is_exit ? 2 : 0; - flags |= is_guard ? 4 : 0; } } else if (node->ri) { /* Must be a bridge if we're willing to use it */ @@ -1994,12 +1989,11 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, bitarray_set(exit_bits, i); if (is_guard) bitarray_set(guard_bits, i); + if (node->is_fast) + bitarray_set(fast_bits, i); + if (is_known) { - bandwidths[i] = (int32_t) this_bw; - /* Casting this_bw to int32_t is safe because both kb_to_bytes - and bridge_get_advertised_bandwidth_bounded limit it to below - INT32_MAX. */ - tor_assert(bandwidths[i] >= 0); + bandwidths[i] = this_bw; if (is_guard) total_guard_bw += this_bw; else @@ -2010,7 +2004,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, total_nonexit_bw += this_bw; } else { ++n_unknown; - bandwidths[node_sl_idx] = -flags; + bandwidths[i] = UINT64_MAX; } } SMARTLIST_FOREACH_END(node); @@ -2028,12 +2022,12 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, avg_slow = 20000; } for (i=0; i<(unsigned)smartlist_len(sl); ++i) { - int32_t bw = bandwidths[i]; - if (bw>=0) + if (bandwidths[i] != UINT64_MAX) continue; - is_exit = ((-bw)&2); - is_guard = ((-bw)&4); - bandwidths[i] = ((-bw)&1) ? avg_fast : avg_slow; + is_fast = bitarray_is_set(fast_bits, i); + is_exit = bitarray_is_set(exit_bits, i); + is_guard = bitarray_is_set(guard_bits, i); + bandwidths[i] = is_fast ? avg_fast : avg_slow; if (is_exit) total_exit_bw += bandwidths[i]; else @@ -2048,6 +2042,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, /* If there's no bandwidth at all, pick at random. */ if (!(total_exit_bw+total_nonexit_bw)) { tor_free(bandwidths); + tor_free(fast_bits); tor_free(exit_bits); tor_free(guard_bits); return smartlist_choose(sl); @@ -2081,20 +2076,20 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, total_bw = 0; sl_last_weighted_bw_of_me = 0; for (i=0; i < (unsigned)smartlist_len(sl); i++) { - uint64_t bw; + tor_assert(bandwidths[i] < UINT64_MAX); + is_exit = bitarray_is_set(exit_bits, i); is_guard = bitarray_is_set(guard_bits, i); if (is_exit && is_guard) - bw = ((uint64_t)(bandwidths[i] * exit_weight * guard_weight)); + bandwidths[i] = tor_llround(bandwidths[i] * exit_weight * guard_weight); else if (is_guard) - bw = ((uint64_t)(bandwidths[i] * guard_weight)); + bandwidths[i] = tor_llround(bandwidths[i] * guard_weight); else if (is_exit) - bw = ((uint64_t)(bandwidths[i] * exit_weight)); - else - bw = bandwidths[i]; - total_bw += bw; + bandwidths[i] = tor_llround(bandwidths[i] * exit_weight); + + total_bw += bandwidths[i]; if (i == (unsigned) me_idx) - sl_last_weighted_bw_of_me = bw; + sl_last_weighted_bw_of_me = bandwidths[i]; } } @@ -2121,18 +2116,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, tmp = 0; i_chosen = (unsigned)smartlist_len(sl); for (i=0; i < (unsigned)smartlist_len(sl); i++) { - is_exit = bitarray_is_set(exit_bits, i); - is_guard = bitarray_is_set(guard_bits, i); - - /* Weights can be 0 if not counting guards/exits */ - if (is_exit && is_guard) - tmp += ((uint64_t)(bandwidths[i] * exit_weight * guard_weight)); - else if (is_guard) - tmp += ((uint64_t)(bandwidths[i] * guard_weight)); - else if (is_exit) - tmp += ((uint64_t)(bandwidths[i] * exit_weight)); - else - tmp += bandwidths[i]; + tmp += bandwidths[i]; if (tmp > rand_bw) { i_chosen = i; @@ -2151,6 +2135,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, U64_PRINTF_ARG(rand_bw), U64_PRINTF_ARG(total_bw)); } tor_free(bandwidths); + tor_free(fast_bits); tor_free(exit_bits); tor_free(guard_bits); return smartlist_get(sl, i); From 07df4dd52d3ab2eea2e8a8fc3222a5d297d077de Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 9 Aug 2012 13:47:42 -0400 Subject: [PATCH 5/8] Refactor the core of choosing by weights into a function This eliminates duplicated code, and lets us test a hairy piece of functionality. --- changes/bug6538 | 4 ++ src/or/routerlist.c | 162 ++++++++++++++++++-------------------------- src/or/routerlist.h | 5 ++ src/test/test.h | 4 ++ src/test/test_dir.c | 81 ++++++++++++++++++++++ 5 files changed, 160 insertions(+), 96 deletions(-) diff --git a/changes/bug6538 b/changes/bug6538 index fc9e583d52..03c168b60a 100644 --- a/changes/bug6538 +++ b/changes/bug6538 @@ -10,3 +10,7 @@ than it ran through the part of the loop before it had made its choice. Fix for bug 6538. + o Code simplifications and refactoring: + - Move the core of our "choose a weighted element at random" logic + into its own function, and give it unit tests. Now the logic is + testable, and a little less fragile too. diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 801c4965ea..1c0aca8ad1 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -11,6 +11,7 @@ * servers. **/ +#define ROUTERLIST_PRIVATE #include "or.h" #include "circuitbuild.h" #include "config.h" @@ -1652,6 +1653,53 @@ router_get_advertised_bandwidth_capped(const routerinfo_t *router) return result; } +/** Pick a random element of n_entries-element array entries, + * choosing each element with a probability proportional to its value, and + * return the index of that element. If all elements are 0, choose an index + * at random. If total_out is provided, set it to the sum of all + * elements in the array. Return -1 on error. + */ +/* private */ int +choose_array_element_by_weight(const uint64_t *entries, int n_entries, + uint64_t *total_out) +{ + int i, i_chosen=-1, n_chosen=0; + uint64_t total_so_far = 0; + uint64_t rand_val; + uint64_t total = 0; + + for (i = 0; i < n_entries; ++i) + total += entries[i]; + + if (total_out) + *total_out = total; + + if (n_entries < 1) + return -1; + + if (total == 0) + return crypto_rand_int(n_entries); + + rand_val = crypto_rand_uint64(total); + + for (i = 0; i < n_entries; ++i) { + total_so_far += entries[i]; + if (total_so_far > rand_val) { + i_chosen = i; + n_chosen++; + /* Set rand_val to UINT_MAX rather than stopping the loop. This way, + * the time we spend in the loop does not leak which element we chose. */ + rand_val = UINT64_MAX; + } + } + tor_assert(total_so_far == total); + tor_assert(n_chosen == 1); + tor_assert(i_chosen >= 0); + tor_assert(i_chosen < n_entries); + + return i_chosen; +} + /** When weighting bridges, enforce these values as lower and upper * bound for believable bandwidth, because there is no way for us * to verify a bridge's bandwidth currently. */ @@ -1702,15 +1750,10 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, bandwidth_weight_rule_t rule) { int64_t weight_scale; - uint64_t rand_bw; double Wg = -1, Wm = -1, We = -1, Wd = -1; double Wgb = -1, Wmb = -1, Web = -1, Wdb = -1; - uint64_t weighted_bw = 0, unweighted_bw = 0; + uint64_t weighted_bw = 0; uint64_t *bandwidths; - uint64_t tmp; - unsigned int i; - unsigned int i_chosen; - int have_unknown = 0; /* true iff sl contains element not in consensus. */ /* Can't choose exit and guard at same time */ tor_assert(rule == NO_WEIGHTING || @@ -1814,7 +1857,6 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, } else if (node->ri) { /* bridge or other descriptor not in our consensus */ this_bw = bridge_get_advertised_bandwidth_bounded(node->ri); - have_unknown = 1; } else { /* We can't use this one. */ continue; @@ -1838,69 +1880,22 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, weight = 0.0; bandwidths[node_sl_idx] = tor_llround(weight*this_bw + 0.5); - weighted_bw += bandwidths[node_sl_idx]; - unweighted_bw += this_bw; if (is_me) sl_last_weighted_bw_of_me = bandwidths[node_sl_idx]; } SMARTLIST_FOREACH_END(node); - /* XXXX this is a kludge to expose these values. */ - sl_last_total_weighted_bw = weighted_bw; - log_debug(LD_CIRC, "Choosing node for rule %s based on weights " "Wg=%f Wm=%f We=%f Wd=%f with total bw "U64_FORMAT, bandwidth_weight_rule_to_string(rule), Wg, Wm, We, Wd, U64_PRINTF_ARG(weighted_bw)); - /* If there is no bandwidth, choose at random */ - if (weighted_bw == 0) { - /* Don't warn when using bridges/relays not in the consensus */ - if (!have_unknown) { -#define ZERO_BANDWIDTH_WARNING_INTERVAL (15) - static ratelim_t zero_bandwidth_warning_limit = - RATELIM_INIT(ZERO_BANDWIDTH_WARNING_INTERVAL); - char *msg; - if ((msg = rate_limit_log(&zero_bandwidth_warning_limit, - approx_time()))) { - log_warn(LD_CIRC, - "Weighted bandwidth is "U64_FORMAT" in node selection for " - "rule %s (unweighted was "U64_FORMAT") %s", - U64_PRINTF_ARG(weighted_bw), - bandwidth_weight_rule_to_string(rule), - U64_PRINTF_ARG(unweighted_bw), msg); - } - } + { + int idx = choose_array_element_by_weight(bandwidths, + smartlist_len(sl), + &sl_last_total_weighted_bw); tor_free(bandwidths); - return smartlist_choose(sl); + return idx < 0 ? NULL : smartlist_get(sl, idx); } - - rand_bw = crypto_rand_uint64(weighted_bw); - - /* Last, count through sl until we get to the element we picked */ - i_chosen = (unsigned)smartlist_len(sl); - tmp = 0; - for (i=0; i < (unsigned)smartlist_len(sl); i++) { - tmp += bandwidths[i]; - if (tmp > rand_bw) { - i_chosen = i; - rand_bw = UINT64_MAX; - } - } - i = i_chosen; - - if (i == (unsigned)smartlist_len(sl)) { - /* This was once possible due to round-off error, but shouldn't be able - * to occur any longer. */ - tor_fragile_assert(); - --i; - log_warn(LD_BUG, "Round-off error in computing bandwidth had an effect on " - " which router we chose. Please tell the developers. " - U64_FORMAT" "U64_FORMAT" "U64_FORMAT, - U64_PRINTF_ARG(tmp), U64_PRINTF_ARG(rand_bw), - U64_PRINTF_ARG(weighted_bw)); - } - tor_free(bandwidths); - return smartlist_get(sl, i); } /** Helper function: @@ -1921,14 +1916,12 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule) { unsigned int i; - unsigned int i_chosen; uint64_t *bandwidths; int is_exit; int is_guard; int is_fast; - uint64_t total_nonexit_bw = 0, total_exit_bw = 0, total_bw = 0; + uint64_t total_nonexit_bw = 0, total_exit_bw = 0; uint64_t total_nonguard_bw = 0, total_guard_bw = 0; - uint64_t rand_bw, tmp; double exit_weight; double guard_weight; int n_unknown = 0; @@ -2073,7 +2066,6 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, if (guard_weight <= 0.0) guard_weight = 0.0; - total_bw = 0; sl_last_weighted_bw_of_me = 0; for (i=0; i < (unsigned)smartlist_len(sl); i++) { tor_assert(bandwidths[i] < UINT64_MAX); @@ -2087,15 +2079,12 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, else if (is_exit) bandwidths[i] = tor_llround(bandwidths[i] * exit_weight); - total_bw += bandwidths[i]; if (i == (unsigned) me_idx) sl_last_weighted_bw_of_me = bandwidths[i]; } } - /* XXXX this is a kludge to expose these values. */ - sl_last_total_weighted_bw = total_bw; - +#if 0 log_debug(LD_CIRC, "Total weighted bw = "U64_FORMAT ", exit bw = "U64_FORMAT ", nonexit bw = "U64_FORMAT", exit weight = %f " @@ -2108,37 +2097,18 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, exit_weight, (int)(rule == WEIGHT_FOR_EXIT), U64_PRINTF_ARG(total_guard_bw), U64_PRINTF_ARG(total_nonguard_bw), guard_weight, (int)(rule == WEIGHT_FOR_GUARD)); +#endif - /* Almost done: choose a random value from the bandwidth weights. */ - rand_bw = crypto_rand_uint64(total_bw); - - /* Last, count through sl until we get to the element we picked */ - tmp = 0; - i_chosen = (unsigned)smartlist_len(sl); - for (i=0; i < (unsigned)smartlist_len(sl); i++) { - tmp += bandwidths[i]; - - if (tmp > rand_bw) { - i_chosen = i; - rand_bw = UINT64_MAX; - } + { + int idx = choose_array_element_by_weight(bandwidths, + smartlist_len(sl), + &sl_last_total_weighted_bw); + tor_free(bandwidths); + tor_free(fast_bits); + tor_free(exit_bits); + tor_free(guard_bits); + return idx < 0 ? NULL : smartlist_get(sl, idx); } - i = i_chosen; - if (i == (unsigned)smartlist_len(sl)) { - /* This was once possible due to round-off error, but shouldn't be able - * to occur any longer. */ - tor_fragile_assert(); - --i; - log_warn(LD_BUG, "Round-off error in computing bandwidth had an effect on " - " which router we chose. Please tell the developers. " - U64_FORMAT " " U64_FORMAT " " U64_FORMAT, U64_PRINTF_ARG(tmp), - U64_PRINTF_ARG(rand_bw), U64_PRINTF_ARG(total_bw)); - } - tor_free(bandwidths); - tor_free(fast_bits); - tor_free(exit_bits); - tor_free(guard_bits); - return smartlist_get(sl, i); } /** Choose a random element of status list sl, weighted by diff --git a/src/or/routerlist.h b/src/or/routerlist.h index 8dcc6eb026..0b9b297514 100644 --- a/src/or/routerlist.h +++ b/src/or/routerlist.h @@ -216,5 +216,10 @@ int hex_digest_nickname_decode(const char *hexdigest, char *nickname_qualifier_out, char *nickname_out); +#ifdef ROUTERLIST_PRIVATE +int choose_array_element_by_weight(const uint64_t *entries, int n_entries, + uint64_t *total_out); +#endif + #endif diff --git a/src/test/test.h b/src/test/test.h index 0b6e6c60cb..6dcb9490bd 100644 --- a/src/test/test.h +++ b/src/test/test.h @@ -65,6 +65,10 @@ #define test_memeq_hex(expr1, hex) test_mem_op_hex(expr1, ==, hex) +#define tt_double_op(a,op,b) \ + tt_assert_test_type(a,b,#a" "#op" "#b,double,(val1_ op val2_),"%f", \ + TT_EXIT_TEST_FUNCTION) + const char *get_fname(const char *name); crypto_pk_t *pk_generate(int idx); diff --git a/src/test/test_dir.c b/src/test/test_dir.c index 83c612045b..ed0c5a1afb 100644 --- a/src/test/test_dir.c +++ b/src/test/test_dir.c @@ -7,6 +7,7 @@ #define DIRSERV_PRIVATE #define DIRVOTE_PRIVATE #define ROUTER_PRIVATE +#define ROUTERLIST_PRIVATE #define HIBERNATE_PRIVATE #include "or.h" #include "directory.h" @@ -1381,6 +1382,85 @@ test_dir_v3_networkstatus(void) ns_detached_signatures_free(dsig2); } +static void +test_dir_random_weighted(void *testdata) +{ + int histogram[10]; + uint64_t vals[10] = {3,1,2,4,6,0,7,5,8,9}, total=0; + uint64_t zeros[5] = {0,0,0,0,0}; + int i, choice; + const int n = 50000; + double max_sq_error; + (void) testdata; + + /* Try a ten-element array with values from 0 through 10. The values are + * in a scrambled order to make sure we don't depend on order. */ + memset(histogram,0,sizeof(histogram)); + for (i=0; i<10; ++i) + total += vals[i]; + tt_int_op(total, ==, 45); + for (i=0; i=, 0); + tt_int_op(choice, <, 10); + histogram[choice]++; + } + + /* Now see if we chose things about frequently enough. */ + max_sq_error = 0; + for (i=0; i<10; ++i) { + int expected = (int)(n*vals[i]/total); + double frac_diff = 0, sq; + TT_BLATHER((" %d : %5d vs %5d\n", (int)vals[i], histogram[i], expected)); + if (expected) + frac_diff = (histogram[i] - expected) / ((double)expected); + else + tt_int_op(histogram[i], ==, 0); + + sq = frac_diff * frac_diff; + if (sq > max_sq_error) + max_sq_error = sq; + } + /* It should almost always be much much less than this. If you want to + * figure out the odds, please feel free. */ + tt_double_op(max_sq_error, <, .05); + + /* Now try a singleton; do we choose it? */ + for (i = 0; i < 100; ++i) { + choice = choose_array_element_by_weight(vals, 1, NULL); + tt_int_op(choice, ==, 0); + } + + /* Now try an array of zeros. We should choose randomly. */ + memset(histogram,0,sizeof(histogram)); + for (i = 0; i < n; ++i) { + uint64_t t; + choice = choose_array_element_by_weight(zeros, 5, &t); + tt_int_op(t, ==, 0); + tt_int_op(choice, >=, 0); + tt_int_op(choice, <, 5); + histogram[choice]++; + } + /* Now see if we chose things about frequently enough. */ + max_sq_error = 0; + for (i=0; i<5; ++i) { + int expected = n/5; + double frac_diff = 0, sq; + TT_BLATHER((" %d : %5d vs %5d\n", (int)vals[i], histogram[i], expected)); + frac_diff = (histogram[i] - expected) / ((double)expected); + sq = frac_diff * frac_diff; + if (sq > max_sq_error) + max_sq_error = sq; + } + /* It should almost always be much much less than this. If you want to + * figure out the odds, please feel free. */ + tt_double_op(max_sq_error, <, .05); + done: + ; +} + #define DIR_LEGACY(name) \ { #name, legacy_test_helper, TT_FORK, &legacy_setup, test_dir_ ## name } @@ -1396,6 +1476,7 @@ struct testcase_t dir_tests[] = { DIR_LEGACY(measured_bw), DIR_LEGACY(param_voting), DIR_LEGACY(v3_networkstatus), + DIR(random_weighted), END_OF_TESTCASES }; From ef628649c85c9a996b22dfbad494f1757b091d45 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 27 Aug 2012 16:44:54 -0400 Subject: [PATCH 6/8] Spelling fix in util.c comments --- src/common/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/util.c b/src/common/util.c index 60222f4083..cea63e5c02 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -364,7 +364,7 @@ tor_mathlog(double d) } /** Return the long integer closest to d. We define this wrapper here so - * that not all users of math.h need to use the right incancations to get + * that not all users of math.h need to use the right intancations to get * the c99 functions. */ long tor_lround(double d) @@ -379,7 +379,7 @@ tor_lround(double d) } /** Return the 64-bit integer closest to d. We define this wrapper here so - * that not all users of math.h need to use the right incancations to get the + * that not all users of math.h need to use the right incantations to get the * c99 functions. */ int64_t tor_llround(double d) From 5c3199cda72fbdcf8f801219a0f9932673801da5 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 27 Aug 2012 17:37:56 -0400 Subject: [PATCH 7/8] In choose-by-bw, scale to better use the range of uint64 The smart part of this is based on an approach and a suggestion by rransom. The unsmart part is my own fault. --- src/or/routerlist.c | 109 ++++++++++++++++++++++++++++---------------- src/or/routerlist.h | 13 +++++- src/test/test_dir.c | 60 ++++++++++++++++++++---- 3 files changed, 131 insertions(+), 51 deletions(-) diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 1c0aca8ad1..185abf53bc 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -1653,15 +1653,41 @@ router_get_advertised_bandwidth_capped(const routerinfo_t *router) return result; } +/** Given an array of double/uint64_t unions that are currently being used as + * doubles, convert them to uint64_t, and try to scale them linearly so as to + * much of the range of uint64_t. If total_out is provided, set it to + * the sum of all elements in the array _before_ scaling. */ +/* private */ void +scale_array_elements_to_u64(u64_dbl_t *entries, int n_entries, + uint64_t *total_out) +{ + double total = 0.0; + double scale_factor; + int i; + /* big, but far away from overflowing an int64_t */ +#define SCALE_TO_U64_MAX (INT64_MAX / 4) + + for (i = 0; i < n_entries; ++i) + total += entries[i].dbl; + + scale_factor = SCALE_TO_U64_MAX / total; + + for (i = 0; i < n_entries; ++i) + entries[i].u64 = tor_llround(entries[i].dbl * scale_factor); + + if (total_out) + *total_out = (uint64_t) total; + +#undef SCALE_TO_U64_MAX +} + /** Pick a random element of n_entries-element array entries, - * choosing each element with a probability proportional to its value, and - * return the index of that element. If all elements are 0, choose an index - * at random. If total_out is provided, set it to the sum of all - * elements in the array. Return -1 on error. + * choosing each element with a probability proportional to its (uint64_t) + * value, and return the index of that element. If all elements are 0, choose + * an index at random. Return -1 on error. */ /* private */ int -choose_array_element_by_weight(const uint64_t *entries, int n_entries, - uint64_t *total_out) +choose_array_element_by_weight(const u64_dbl_t *entries, int n_entries) { int i, i_chosen=-1, n_chosen=0; uint64_t total_so_far = 0; @@ -1669,10 +1695,7 @@ choose_array_element_by_weight(const uint64_t *entries, int n_entries, uint64_t total = 0; for (i = 0; i < n_entries; ++i) - total += entries[i]; - - if (total_out) - *total_out = total; + total += entries[i].u64; if (n_entries < 1) return -1; @@ -1683,7 +1706,7 @@ choose_array_element_by_weight(const uint64_t *entries, int n_entries, rand_val = crypto_rand_uint64(total); for (i = 0; i < n_entries; ++i) { - total_so_far += entries[i]; + total_so_far += entries[i].u64; if (total_so_far > rand_val) { i_chosen = i; n_chosen++; @@ -1753,7 +1776,7 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, double Wg = -1, Wm = -1, We = -1, Wd = -1; double Wgb = -1, Wmb = -1, Web = -1, Wdb = -1; uint64_t weighted_bw = 0; - uint64_t *bandwidths; + u64_dbl_t *bandwidths; /* Can't choose exit and guard at same time */ tor_assert(rule == NO_WEIGHTING || @@ -1834,7 +1857,7 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, Web /= weight_scale; Wdb /= weight_scale; - bandwidths = tor_malloc_zero(sizeof(uint64_t)*smartlist_len(sl)); + bandwidths = tor_malloc_zero(sizeof(u64_dbl_t)*smartlist_len(sl)); // Cycle through smartlist and total the bandwidth. SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) { @@ -1879,9 +1902,9 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, if (weight < 0.0) weight = 0.0; - bandwidths[node_sl_idx] = tor_llround(weight*this_bw + 0.5); + bandwidths[node_sl_idx].dbl = weight*this_bw + 0.5; if (is_me) - sl_last_weighted_bw_of_me = bandwidths[node_sl_idx]; + sl_last_weighted_bw_of_me = (uint64_t) bandwidths[node_sl_idx].dbl; } SMARTLIST_FOREACH_END(node); log_debug(LD_CIRC, "Choosing node for rule %s based on weights " @@ -1889,10 +1912,12 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, bandwidth_weight_rule_to_string(rule), Wg, Wm, We, Wd, U64_PRINTF_ARG(weighted_bw)); + scale_array_elements_to_u64(bandwidths, smartlist_len(sl), + &sl_last_total_weighted_bw); + { int idx = choose_array_element_by_weight(bandwidths, - smartlist_len(sl), - &sl_last_total_weighted_bw); + smartlist_len(sl)); tor_free(bandwidths); return idx < 0 ? NULL : smartlist_get(sl, idx); } @@ -1916,12 +1941,12 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule) { unsigned int i; - uint64_t *bandwidths; + u64_dbl_t *bandwidths; int is_exit; int is_guard; int is_fast; - uint64_t total_nonexit_bw = 0, total_exit_bw = 0; - uint64_t total_nonguard_bw = 0, total_guard_bw = 0; + double total_nonexit_bw = 0, total_exit_bw = 0; + double total_nonguard_bw = 0, total_guard_bw = 0; double exit_weight; double guard_weight; int n_unknown = 0; @@ -1950,7 +1975,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, /* First count the total bandwidth weight, and make a list * of each value. We use UINT64_MAX to indicate "unknown". */ - bandwidths = tor_malloc_zero(sizeof(uint64_t)*smartlist_len(sl)); + bandwidths = tor_malloc_zero(sizeof(u64_dbl_t)*smartlist_len(sl)); fast_bits = bitarray_init_zero(smartlist_len(sl)); exit_bits = bitarray_init_zero(smartlist_len(sl)); guard_bits = bitarray_init_zero(smartlist_len(sl)); @@ -1986,7 +2011,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, bitarray_set(fast_bits, i); if (is_known) { - bandwidths[i] = this_bw; + bandwidths[i].dbl = this_bw; if (is_guard) total_guard_bw += this_bw; else @@ -1997,14 +2022,16 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, total_nonexit_bw += this_bw; } else { ++n_unknown; - bandwidths[i] = UINT64_MAX; + bandwidths[i].dbl = -1.0; } } SMARTLIST_FOREACH_END(node); +#define EPSILON .1 + /* Now, fill in the unknown values. */ if (n_unknown) { int32_t avg_fast, avg_slow; - if (total_exit_bw+total_nonexit_bw) { + if (total_exit_bw+total_nonexit_bw < EPSILON) { /* if there's some bandwidth, there's at least one known router, * so no worries about div by 0 here */ int n_known = smartlist_len(sl)-n_unknown; @@ -2015,25 +2042,25 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, avg_slow = 20000; } for (i=0; i<(unsigned)smartlist_len(sl); ++i) { - if (bandwidths[i] != UINT64_MAX) + if (bandwidths[i].dbl >= 0.0) continue; is_fast = bitarray_is_set(fast_bits, i); is_exit = bitarray_is_set(exit_bits, i); is_guard = bitarray_is_set(guard_bits, i); - bandwidths[i] = is_fast ? avg_fast : avg_slow; + bandwidths[i].dbl = is_fast ? avg_fast : avg_slow; if (is_exit) - total_exit_bw += bandwidths[i]; + total_exit_bw += bandwidths[i].dbl; else - total_nonexit_bw += bandwidths[i]; + total_nonexit_bw += bandwidths[i].dbl; if (is_guard) - total_guard_bw += bandwidths[i]; + total_guard_bw += bandwidths[i].dbl; else - total_nonguard_bw += bandwidths[i]; + total_nonguard_bw += bandwidths[i].dbl; } } /* If there's no bandwidth at all, pick at random. */ - if (!(total_exit_bw+total_nonexit_bw)) { + if (total_exit_bw+total_nonexit_bw < EPSILON) { tor_free(bandwidths); tor_free(fast_bits); tor_free(exit_bits); @@ -2050,12 +2077,12 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, * For detailed derivation of this formula, see * http://archives.seul.org/or/dev/Jul-2007/msg00056.html */ - if (rule == WEIGHT_FOR_EXIT || !total_exit_bw) + if (rule == WEIGHT_FOR_EXIT || total_exit_bw= 0.0); is_exit = bitarray_is_set(exit_bits, i); is_guard = bitarray_is_set(guard_bits, i); if (is_exit && is_guard) - bandwidths[i] = tor_llround(bandwidths[i] * exit_weight * guard_weight); + bandwidths[i].dbl *= exit_weight * guard_weight; else if (is_guard) - bandwidths[i] = tor_llround(bandwidths[i] * guard_weight); + bandwidths[i].dbl *= guard_weight; else if (is_exit) - bandwidths[i] = tor_llround(bandwidths[i] * exit_weight); + bandwidths[i].dbl *= exit_weight; if (i == (unsigned) me_idx) - sl_last_weighted_bw_of_me = bandwidths[i]; + sl_last_weighted_bw_of_me = (uint64_t) bandwidths[i].dbl; } } @@ -2099,10 +2126,12 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, guard_weight, (int)(rule == WEIGHT_FOR_GUARD)); #endif + scale_array_elements_to_u64(bandwidths, smartlist_len(sl), + &sl_last_total_weighted_bw); + { int idx = choose_array_element_by_weight(bandwidths, - smartlist_len(sl), - &sl_last_total_weighted_bw); + smartlist_len(sl)); tor_free(bandwidths); tor_free(fast_bits); tor_free(exit_bits); diff --git a/src/or/routerlist.h b/src/or/routerlist.h index 0b9b297514..2072611897 100644 --- a/src/or/routerlist.h +++ b/src/or/routerlist.h @@ -217,8 +217,17 @@ int hex_digest_nickname_decode(const char *hexdigest, char *nickname_out); #ifdef ROUTERLIST_PRIVATE -int choose_array_element_by_weight(const uint64_t *entries, int n_entries, - uint64_t *total_out); +/** Helper type for choosing routers by bandwidth: contains a union of + * double and uint64_t. Before we call scale_array_elements_to_u64, it holds + * a double; after, it holds a uint64_t. */ +typedef union u64_dbl_t { + uint64_t u64; + double dbl; +} u64_dbl_t; + +int choose_array_element_by_weight(const u64_dbl_t *entries, int n_entries); +void scale_array_elements_to_u64(u64_dbl_t *entries, int n_entries, + uint64_t *total_out); #endif #endif diff --git a/src/test/test_dir.c b/src/test/test_dir.c index ed0c5a1afb..3f78ece6a0 100644 --- a/src/test/test_dir.c +++ b/src/test/test_dir.c @@ -4,6 +4,8 @@ /* See LICENSE for licensing information */ #include "orconfig.h" +#include + #define DIRSERV_PRIVATE #define DIRVOTE_PRIVATE #define ROUTER_PRIVATE @@ -1382,12 +1384,51 @@ test_dir_v3_networkstatus(void) ns_detached_signatures_free(dsig2); } +static void +test_dir_scale_bw(void *testdata) +{ + double v[8] = { 2.0/3, + 7.0, + 1.0, + 3.0, + 1.0/5, + 1.0/7, + 12.0, + 24.0 }; + u64_dbl_t vals[8]; + uint64_t total; + int i; + + (void) testdata; + + for (i=0; i<8; ++i) + vals[i].dbl = v[i]; + + scale_array_elements_to_u64(vals, 8, &total); + + tt_int_op((int)total, ==, 48); + total = 0; + for (i=0; i<8; ++i) { + total += vals[i].u64; + } + tt_assert(total >= (U64_LITERAL(1)<<60)); + tt_assert(total <= (U64_LITERAL(1)<<62)); + + for (i=0; i<8; ++i) { + double ratio = ((double)vals[i].u64) / vals[2].u64; + tt_double_op(fabs(ratio - v[i]), <, .00001); + } + + done: + ; +} + static void test_dir_random_weighted(void *testdata) { int histogram[10]; uint64_t vals[10] = {3,1,2,4,6,0,7,5,8,9}, total=0; - uint64_t zeros[5] = {0,0,0,0,0}; + u64_dbl_t inp[10]; int i, choice; const int n = 50000; double max_sq_error; @@ -1396,13 +1437,13 @@ test_dir_random_weighted(void *testdata) /* Try a ten-element array with values from 0 through 10. The values are * in a scrambled order to make sure we don't depend on order. */ memset(histogram,0,sizeof(histogram)); - for (i=0; i<10; ++i) + for (i=0; i<10; ++i) { + inp[i].u64 = vals[i]; total += vals[i]; + } tt_int_op(total, ==, 45); for (i=0; i=, 0); tt_int_op(choice, <, 10); histogram[choice]++; @@ -1429,16 +1470,16 @@ test_dir_random_weighted(void *testdata) /* Now try a singleton; do we choose it? */ for (i = 0; i < 100; ++i) { - choice = choose_array_element_by_weight(vals, 1, NULL); + choice = choose_array_element_by_weight(inp, 1); tt_int_op(choice, ==, 0); } /* Now try an array of zeros. We should choose randomly. */ memset(histogram,0,sizeof(histogram)); + for (i = 0; i < 5; ++i) + inp[i].u64 = 0; for (i = 0; i < n; ++i) { - uint64_t t; - choice = choose_array_element_by_weight(zeros, 5, &t); - tt_int_op(t, ==, 0); + choice = choose_array_element_by_weight(inp, 5); tt_int_op(choice, >=, 0); tt_int_op(choice, <, 5); histogram[choice]++; @@ -1477,6 +1518,7 @@ struct testcase_t dir_tests[] = { DIR_LEGACY(param_voting), DIR_LEGACY(v3_networkstatus), DIR(random_weighted), + DIR(scale_bw), END_OF_TESTCASES }; From 9982122f3448293e80adf83cb28c7ab66bc04da9 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Tue, 28 Aug 2012 12:42:21 -0400 Subject: [PATCH 8/8] Use a time-invariant comparison in choose_array_element_by_weight --- src/or/routerlist.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 185abf53bc..7dc430b0f8 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -1681,6 +1681,20 @@ scale_array_elements_to_u64(u64_dbl_t *entries, int n_entries, #undef SCALE_TO_U64_MAX } +/** Time-invariant 64-bit greater-than; works on two integers in the range + * (0,INT64_MAX). */ +#if SIZEOF_VOID_P == 8 +#define gt_i64_timei(a,b) ((a) > (b)) +#else +static INLINE int +gt_i64_timei(uint64_t a, uint64_t b) +{ + int64_t diff = (int64_t) (b - a); + int res = diff >> 63; + return res & 1; +} +#endif + /** Pick a random element of n_entries-element array entries, * choosing each element with a probability proportional to its (uint64_t) * value, and return the index of that element. If all elements are 0, choose @@ -1703,16 +1717,18 @@ choose_array_element_by_weight(const u64_dbl_t *entries, int n_entries) if (total == 0) return crypto_rand_int(n_entries); + tor_assert(total < INT64_MAX); + rand_val = crypto_rand_uint64(total); for (i = 0; i < n_entries; ++i) { total_so_far += entries[i].u64; - if (total_so_far > rand_val) { + if (gt_i64_timei(total_so_far, rand_val)) { i_chosen = i; n_chosen++; - /* Set rand_val to UINT_MAX rather than stopping the loop. This way, + /* Set rand_val to INT64_MAX rather than stopping the loop. This way, * the time we spend in the loop does not leak which element we chose. */ - rand_val = UINT64_MAX; + rand_val = INT64_MAX; } } tor_assert(total_so_far == total);