From cb29687e93169e9615de3bc5adcbf6d31551b32c Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Tue, 17 Oct 2017 13:24:40 -0400 Subject: [PATCH] Replace our random-exponential-delay algorithm. This patch has implementations of the "decorrelated" and "full" algorithms from https://www.awsarchitectureblog.com/2015/03/backoff.html --- changes/bug23816 | 6 ++++ src/or/directory.c | 77 +++++++++++++++++++++++++++------------------ src/or/directory.h | 8 ++++- src/test/test_dir.c | 42 ++++++++++++++++++++++--- 4 files changed, 97 insertions(+), 36 deletions(-) create mode 100644 changes/bug23816 diff --git a/changes/bug23816 b/changes/bug23816 new file mode 100644 index 0000000000..6139dec9e8 --- /dev/null +++ b/changes/bug23816 @@ -0,0 +1,6 @@ + o Minor bugfixes (directory client): + - On failure to download directory information, delay retry attempts + by a random amount based on the "decorrelated jitter" algorithm. + Our previous delay algorithm tended to produce extra-long delays too + easily. Fixes bug 23816; bugfix on 0.2.9.1-alpha. + diff --git a/src/or/directory.c b/src/or/directory.c index fce48c6e95..758a29b050 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -3776,53 +3776,68 @@ find_dl_min_and_max_delay(download_status_t *dls, const or_options_t *options, *max = INT_MAX; } -/** Advance one delay step. The algorithm is to use the previous delay to - * compute an increment, we construct a value uniformly at random between - * delay and MAX(delay*2,delay+1). We then clamp that value to be no larger - * than max_delay, and return it. +/** As next_random_exponential_delay() below, but does not compute a random + * value. Instead, compute the range of values that + * next_random_exponential_delay() should use when computing its random value. + * Store the low bound into *low_bound_out, and the high bound into + * *high_bound_out. Guarantees that the low bound is strictly less + * than the high bound. */ +STATIC void +next_random_exponential_delay_range(int *low_bound_out, + int *high_bound_out, + int delay, + int base_delay) +{ + // This is the "decorrelated jitter" approach, from + // https://www.awsarchitectureblog.com/2015/03/backoff.html + // The formula is + // sleep = min(cap, random_between(base, sleep * 3)) + + const int delay_times_3 = delay < INT_MAX/3 ? delay * 3 : INT_MAX; + *low_bound_out = base_delay; + if (delay_times_3 > base_delay) { + *high_bound_out = delay_times_3; + } else { + *high_bound_out = base_delay+1; + } +} + +/** Advance one delay step. The algorithm will generate a random delay, + * such that each failure is possibly (random) longer than the ones before. + * + * We then clamp that value to be no larger than max_delay, and return it. + * + * The base_delay parameter is lowest possible delay time (can't be + * zero); the backoff_position parameter is the number of times we've + * generated a delay; and the delay argument is the most recently used + * delay. * * Requires that delay is less than INT_MAX, and delay is in [0,max_delay]. */ STATIC int -next_random_exponential_delay(int delay, int max_delay) +next_random_exponential_delay(int delay, + int base_delay, + int max_delay) { /* Check preconditions */ if (BUG(max_delay < 0)) max_delay = 0; if (BUG(delay > max_delay)) delay = max_delay; - if (delay == INT_MAX) - return INT_MAX; /* prevent overflow */ if (BUG(delay < 0)) delay = 0; - /* How much are we willing to add to the delay? */ - int max_increment; - int multiplier = 3; /* no more than quadruple the previous delay */ - if (get_options()->TestingTorNetwork) { - /* Decrease the multiplier in testing networks. This reduces the variance, - * so that bootstrap is more reliable. */ - multiplier = 2; /* no more than triple the previous delay */ - } + if (base_delay < 1) + base_delay = 1; - if (delay && delay < (INT_MAX-1) / multiplier) { - max_increment = delay * multiplier; - } else if (delay) { - max_increment = INT_MAX-1; - } else { - max_increment = 1; - } + int low_bound=0, high_bound=max_delay; - if (BUG(max_increment < 1)) - max_increment = 1; + next_random_exponential_delay_range(&low_bound, &high_bound, + delay, base_delay); - /* the + 1 here is so that we always wait longer than last time. */ - int increment = crypto_rand_int(max_increment)+1; + int rand_delay = crypto_rand_int_range(low_bound, high_bound); - if (increment < max_delay - delay) - return delay + increment; - else - return max_delay; + return MIN(rand_delay, max_delay); } /** Find the current delay for dls based on schedule or min_delay/ @@ -3871,7 +3886,7 @@ download_status_schedule_get_delay(download_status_t *dls, while (dls->last_backoff_position < dls_schedule_position) { /* Do one increment step */ - delay = next_random_exponential_delay(delay, max_delay); + delay = next_random_exponential_delay(delay, min_delay, max_delay); /* Update our position */ ++(dls->last_backoff_position); } diff --git a/src/or/directory.h b/src/or/directory.h index 629b3ead90..c055e60d98 100644 --- a/src/or/directory.h +++ b/src/or/directory.h @@ -167,8 +167,14 @@ STATIC const smartlist_t *find_dl_schedule(download_status_t *dls, STATIC void find_dl_min_and_max_delay(download_status_t *dls, const or_options_t *options, int *min, int *max); -STATIC int next_random_exponential_delay(int delay, int max_delay); +STATIC int next_random_exponential_delay(int delay, + int base_delay, + int max_delay); +STATIC void next_random_exponential_delay_range(int *low_bound_out, + int *high_bound_out, + int delay, + int base_delay); #endif #endif diff --git a/src/test/test_dir.c b/src/test/test_dir.c index cdc56acb89..1c6662147f 100644 --- a/src/test/test_dir.c +++ b/src/test/test_dir.c @@ -3617,6 +3617,7 @@ test_dir_download_status_random_backoff(void *arg) /* Check the random backoff cases */ old_increment = 0; + int n_attempts = 0; do { increment = download_status_schedule_get_delay(&dls_random, NULL, @@ -3625,9 +3626,9 @@ test_dir_download_status_random_backoff(void *arg) /* Test */ tt_int_op(increment, OP_GE, min_delay); tt_int_op(increment, OP_LE, max_delay); - tt_int_op(increment, OP_GE, old_increment); - /* We at most quadruple, and maybe add one */ - tt_int_op(increment, OP_LE, 4 * old_increment + 1); + + if (old_increment) + tt_int_op(increment, OP_LE, old_increment * 3); /* Advance */ current_time += increment; @@ -3636,12 +3637,44 @@ test_dir_download_status_random_backoff(void *arg) /* Try another maybe */ old_increment = increment; - } while (increment < max_delay); + } while (increment < max_delay && ++n_attempts < 1000); done: return; } +static void +test_dir_download_status_random_backoff_ranges(void *arg) +{ + (void)arg; + int lo, hi; + next_random_exponential_delay_range(&lo, &hi, 0, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, 11); + + next_random_exponential_delay_range(&lo, &hi, 6, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, 6*3); + + next_random_exponential_delay_range(&lo, &hi, 13, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, 13 * 3); + + next_random_exponential_delay_range(&lo, &hi, 37, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, 111); + + next_random_exponential_delay_range(&lo, &hi, 123, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, 369); + + next_random_exponential_delay_range(&lo, &hi, INT_MAX-5, 10); + tt_int_op(lo, OP_EQ, 10); + tt_int_op(hi, OP_EQ, INT_MAX); + done: + ; +} + static void test_dir_download_status_increment(void *arg) { @@ -5475,6 +5508,7 @@ struct testcase_t dir_tests[] = { DIR(packages, 0), DIR(download_status_schedule, 0), DIR(download_status_random_backoff, 0), + DIR(download_status_random_backoff_ranges, 0), DIR(download_status_increment, 0), DIR(authdir_type_to_string, 0), DIR(conn_purpose_to_string, 0),