From 35bbf2e4a4e8ccbc4126ebffda67c48989ec2f06 Mon Sep 17 00:00:00 2001 From: "teor (Tim Wilson-Brown)" Date: Mon, 7 Dec 2015 17:55:38 +1100 Subject: [PATCH] Prop210: Add schedules for simultaneous client consensus downloads Prop210: Add attempt-based connection schedules Existing tor schedules increment the schedule position on failure, then retry the connection after the scheduled time. To make multiple simultaneous connections, we need to increment the schedule position when making each attempt, then retry a (potentially simultaneous) connection after the scheduled time. (Also change find_dl_schedule_and_len to find_dl_schedule, as it no longer takes or returns len.) Prop210: Add multiple simultaneous consensus downloads for clients Make connections on TestingClientBootstrapConsensus*DownloadSchedule, incrementing the schedule each time the client attempts to connect. Check if the number of downloads is less than TestingClientBootstrapConsensusMaxInProgressTries before trying any more connections. --- changes/bug4483-multiple-consensus-downloads | 9 + doc/tor.1.txt | 55 ++- src/common/torint.h | 26 ++ src/or/config.c | 76 +++- src/or/directory.c | 252 +++++++++-- src/or/directory.h | 13 +- src/or/main.c | 19 +- src/or/networkstatus.c | 330 ++++++++++++-- src/or/networkstatus.h | 7 + src/or/or.h | 121 +++++- src/or/routerlist.c | 15 +- src/test/test_dir.c | 431 +++++++++++++++++++ 12 files changed, 1255 insertions(+), 99 deletions(-) create mode 100644 changes/bug4483-multiple-consensus-downloads diff --git a/changes/bug4483-multiple-consensus-downloads b/changes/bug4483-multiple-consensus-downloads new file mode 100644 index 0000000000..23d22a89c4 --- /dev/null +++ b/changes/bug4483-multiple-consensus-downloads @@ -0,0 +1,9 @@ + o Major features (consensus downloads): + - Schedule multiple in-progress consensus downloads during client + bootstrap. Use the first one that starts downloading, close the + rest. This reduces failures when authorities are slow or down. + With #15775, it reduces failures due to fallback churn. + Implements #4483 (reduce failures when authorities are down). + Patch by "teor". + Implements IPv4 portions of proposal #210 by "mikeperry" and + "teor". diff --git a/doc/tor.1.txt b/doc/tor.1.txt index 041b000f09..77e4c4e8fe 100644 --- a/doc/tor.1.txt +++ b/doc/tor.1.txt @@ -2281,10 +2281,18 @@ The following options are used for running a testing Tor network. TestingClientDownloadSchedule 0, 0, 5, 10, 15, 20, 30, 60 TestingServerConsensusDownloadSchedule 0, 0, 5, 10, 15, 20, 30, 60 TestingClientConsensusDownloadSchedule 0, 0, 5, 10, 15, 20, 30, 60 + TestingClientBootstrapConsensusAuthorityDownloadSchedule 0, 2, + 4 (for 40 seconds), 8, 16, 32, 60 + TestingClientBootstrapConsensusFallbackDownloadSchedule 0, 1, + 4 (for 40 seconds), 8, 16, 32, 60 + TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule 0, 1, + 4 (for 40 seconds), 8, 16, 32, 60 TestingBridgeDownloadSchedule 60, 30, 30, 60 TestingClientMaxIntervalWithoutRequest 5 seconds TestingDirConnectionMaxStall 30 seconds TestingConsensusMaxDownloadTries 80 + TestingClientBootstrapConsensusMaxDownloadTries 80 + TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries 80 TestingDescriptorMaxDownloadTries 80 TestingMicrodescMaxDownloadTries 80 TestingCertMaxDownloadTries 80 @@ -2345,6 +2353,36 @@ The following options are used for running a testing Tor network. requires that **TestingTorNetwork** is set. (Default: 0, 0, 60, 300, 600, 1800, 3600, 3600, 3600, 10800, 21600, 43200) +[[TestingClientBootstrapConsensusAuthorityDownloadSchedule]] **TestingClientBootstrapConsensusAuthorityDownloadSchedule** __N__,__N__,__...__:: + Schedule for when clients should download consensuses from authorities if + they are bootstrapping (that is, they don't have a usable, reasonably live + consensus). Only used by clients fetching from a list of fallback + directory mirrors. This schedule is advanced by (potentially concurrent) + connection attempts, unlike other schedules, which are advanced by + connection failures. Changing this schedule requires that + **TestingTorNetwork** is set. (Default: 10, 11, 3600, 10800, 25200, 54000, + 111600, 262800) + +[[TestingClientBootstrapConsensusFallbackDownloadSchedule]] **TestingClientBootstrapConsensusFallbackDownloadSchedule** __N__,__N__,__...__:: + Schedule for when clients should download consensuses from fallback + directory mirrors if they are bootstrapping (that is, they don't have a + usable, reasonably live consensus). Only used by clients fetching from a + list of fallback directory mirrors. This schedule is advanced by + (potentially concurrent) connection attempts, unlike other schedules, which + are advanced by connection failures. Changing this schedule requires that + **TestingTorNetwork** is set. (Default: 0, 1, 4, 11, 3600, 10800, 25200, + 54000, 111600, 262800) + +[[TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule]] **TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule** __N__,__N__,__...__:: + Schedule for when clients should download consensuses from authorities if + they are bootstrapping (that is, they don't have a usable, reasonably live + consensus). Only used by clients which don't have or won't fetch from a + list of fallback directory mirrors. This schedule is advanced by + (potentially concurrent) connection attempts, unlike other schedules, + which are advanced by connection failures. Changing this schedule requires + that **TestingTorNetwork** is set. (Default: 0, 3, 7, 3600, 10800, 25200, + 54000, 111600, 262800) + [[TestingBridgeDownloadSchedule]] **TestingBridgeDownloadSchedule** __N__,__N__,__...__:: Schedule for when clients should download bridge descriptors. Changing this requires that **TestingTorNetwork** is set. (Default: 3600, 900, 900, 3600) @@ -2361,9 +2399,24 @@ The following options are used for running a testing Tor network. 5 minutes) [[TestingConsensusMaxDownloadTries]] **TestingConsensusMaxDownloadTries** __NUM__:: - Try this often to download a consensus before giving up. Changing + Try this many times to download a consensus before giving up. Changing this requires that **TestingTorNetwork** is set. (Default: 8) +[[TestingClientBootstrapConsensusMaxDownloadTries]] **TestingClientBootstrapConsensusMaxDownloadTries** __NUM__:: + Try this many times to download a consensus while bootstrapping using + fallback directory mirrors before giving up. Changing this requires that + **TestingTorNetwork** is set. (Default: 7) + +[[TestingClientBootstrapConsensusMaxDownloadTries]] **TestingClientBootstrapConsensusMaxDownloadTries** __NUM__:: + Try this many times to download a consensus while bootstrapping using + only authorities before giving up. Changing this requires that + **TestingTorNetwork** is set. (Default: 4) + +[[TestingClientBootstrapConsensusMaxInProgressTries]] **TestingClientBootstrapConsensusMaxInProgressTries** __NUM__:: + Try this many simultaneous connections to download a consensus before + waiting for one to complete, timeout, or error out. Changing this + requires that **TestingTorNetwork** is set. (Default: 4) + [[TestingDescriptorMaxDownloadTries]] **TestingDescriptorMaxDownloadTries** __NUM__:: Try this often to download a server descriptor before giving up. Changing this requires that **TestingTorNetwork** is set. (Default: 8) diff --git a/src/common/torint.h b/src/common/torint.h index 6171700898..418fe0fabf 100644 --- a/src/common/torint.h +++ b/src/common/torint.h @@ -336,6 +336,32 @@ typedef uint32_t uintptr_t; #endif /* time_t_is_signed */ #endif /* ifndef(TIME_MAX) */ +#ifndef TIME_MIN + +#ifdef TIME_T_IS_SIGNED + +#if (SIZEOF_TIME_T == SIZEOF_INT) +#define TIME_MIN ((time_t)INT_MIN) +#elif (SIZEOF_TIME_T == SIZEOF_LONG) +#define TIME_MIN ((time_t)LONG_MIN) +#elif (SIZEOF_TIME_T == 8) +#define TIME_MIN ((time_t)INT64_MIN) +#else +#error "Can't define (signed) TIME_MIN" +#endif + +#else +/* Unsigned case */ +#if (SIZEOF_TIME_T == 4) +#define TIME_MIN ((time_t)UINT32_MIN) +#elif (SIZEOF_TIME_T == 8) +#define TIME_MIN ((time_t)UINT64_MIN) +#else +#error "Can't define (unsigned) TIME_MIN" +#endif +#endif /* time_t_is_signed */ +#endif /* ifndef(TIME_MIN) */ + #ifndef SIZE_MAX #if (SIZEOF_SIZE_T == 4) #define SIZE_MAX UINT32_MAX diff --git a/src/or/config.c b/src/or/config.c index 7b42c9fdb3..413667a235 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -475,10 +475,40 @@ static config_var_t option_vars_[] = { V(TestingClientConsensusDownloadSchedule, CSV_INTERVAL, "0, 0, 60, " "300, 600, 1800, 3600, 3600, 3600, " "10800, 21600, 43200"), + /* With the TestingClientBootstrapConsensus*Download* below: + * Clients with only authorities will try: + * - 3 authorities over 10 seconds, then wait 60 minutes. + * Clients with authorities and fallbacks will try: + * - 2 authorities and 4 fallbacks over 21 seconds, then wait 60 minutes. + * Clients will also retry when an application request arrives. + * After a number of failed reqests, clients retry every 3 days + 1 hour. + * + * Clients used to try 2 authorities over 10 seconds, then wait for + * 60 minutes or an application request. + * + * When clients have authorities and fallbacks available, they use these + * schedules: (we stagger the times to avoid thundering herds) */ + V(TestingClientBootstrapConsensusAuthorityDownloadSchedule, CSV_INTERVAL, + "10, 11, 3600, 10800, 25200, 54000, 111600, 262800" /* 3 days + 1 hour */), + V(TestingClientBootstrapConsensusFallbackDownloadSchedule, CSV_INTERVAL, + "0, 1, 4, 11, 3600, 10800, 25200, 54000, 111600, 262800"), + /* When clients only have authorities available, they use this schedule: */ + V(TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule, CSV_INTERVAL, + "0, 3, 7, 3600, 10800, 25200, 54000, 111600, 262800"), + /* We don't want to overwhelm slow networks (or mirrors whose replies are + * blocked), but we also don't want to fail if only some mirrors are + * blackholed. Clients will try 3 directories simultaneously. + * (Relays never use simultaneous connections.) */ + V(TestingClientBootstrapConsensusMaxInProgressTries, UINT, "3"), V(TestingBridgeDownloadSchedule, CSV_INTERVAL, "3600, 900, 900, 3600"), V(TestingClientMaxIntervalWithoutRequest, INTERVAL, "10 minutes"), V(TestingDirConnectionMaxStall, INTERVAL, "5 minutes"), V(TestingConsensusMaxDownloadTries, UINT, "8"), + /* Since we try connections rapidly and simultaneously, we can afford + * to give up earlier. (This protects against overloading directories.) */ + V(TestingClientBootstrapConsensusMaxDownloadTries, UINT, "7"), + /* We want to give up much earlier if we're only using authorities. */ + V(TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries, UINT, "4"), V(TestingDescriptorMaxDownloadTries, UINT, "8"), V(TestingMicrodescMaxDownloadTries, UINT, "8"), V(TestingCertMaxDownloadTries, UINT, "8"), @@ -525,10 +555,18 @@ static const config_var_t testing_tor_network_defaults[] = { "15, 20, 30, 60"), V(TestingClientConsensusDownloadSchedule, CSV_INTERVAL, "0, 0, 5, 10, " "15, 20, 30, 60"), + V(TestingClientBootstrapConsensusAuthorityDownloadSchedule, CSV_INTERVAL, + "0, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 16, 32, 60"), + V(TestingClientBootstrapConsensusFallbackDownloadSchedule, CSV_INTERVAL, + "0, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 16, 32, 60"), + V(TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule, CSV_INTERVAL, + "0, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 16, 32, 60"), V(TestingBridgeDownloadSchedule, CSV_INTERVAL, "60, 30, 30, 60"), V(TestingClientMaxIntervalWithoutRequest, INTERVAL, "5 seconds"), V(TestingDirConnectionMaxStall, INTERVAL, "30 seconds"), V(TestingConsensusMaxDownloadTries, UINT, "80"), + V(TestingClientBootstrapConsensusMaxDownloadTries, UINT, "80"), + V(TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries, UINT, "80"), V(TestingDescriptorMaxDownloadTries, UINT, "80"), V(TestingMicrodescMaxDownloadTries, UINT, "80"), V(TestingCertMaxDownloadTries, UINT, "80"), @@ -3749,10 +3787,16 @@ options_validate(or_options_t *old_options, or_options_t *options, CHECK_DEFAULT(TestingClientDownloadSchedule); CHECK_DEFAULT(TestingServerConsensusDownloadSchedule); CHECK_DEFAULT(TestingClientConsensusDownloadSchedule); + CHECK_DEFAULT(TestingClientBootstrapConsensusAuthorityDownloadSchedule); + CHECK_DEFAULT(TestingClientBootstrapConsensusFallbackDownloadSchedule); + CHECK_DEFAULT(TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule); CHECK_DEFAULT(TestingBridgeDownloadSchedule); CHECK_DEFAULT(TestingClientMaxIntervalWithoutRequest); CHECK_DEFAULT(TestingDirConnectionMaxStall); CHECK_DEFAULT(TestingConsensusMaxDownloadTries); + CHECK_DEFAULT(TestingClientBootstrapConsensusMaxDownloadTries); + CHECK_DEFAULT(TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries); + CHECK_DEFAULT(TestingClientBootstrapConsensusMaxInProgressTries); CHECK_DEFAULT(TestingDescriptorMaxDownloadTries); CHECK_DEFAULT(TestingMicrodescMaxDownloadTries); CHECK_DEFAULT(TestingCertMaxDownloadTries); @@ -3827,11 +3871,41 @@ options_validate(or_options_t *old_options, or_options_t *options, } if (options->TestingConsensusMaxDownloadTries < 2) { - REJECT("TestingConsensusMaxDownloadTries must be greater than 1."); + REJECT("TestingConsensusMaxDownloadTries must be greater than 2."); } else if (options->TestingConsensusMaxDownloadTries > 800) { COMPLAIN("TestingConsensusMaxDownloadTries is insanely high."); } + if (options->TestingClientBootstrapConsensusMaxDownloadTries < 2) { + REJECT("TestingClientBootstrapConsensusMaxDownloadTries must be greater " + "than 2." + ); + } else if (options->TestingClientBootstrapConsensusMaxDownloadTries > 800) { + COMPLAIN("TestingClientBootstrapConsensusMaxDownloadTries is insanely " + "high."); + } + + if (options->TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries + < 2) { + REJECT("TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries must " + "be greater than 2." + ); + } else if ( + options->TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries + > 800) { + COMPLAIN("TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries is " + "insanely high."); + } + + if (options->TestingClientBootstrapConsensusMaxInProgressTries < 1) { + REJECT("TestingClientBootstrapConsensusMaxInProgressTries must be greater " + "than 0."); + } else if (options->TestingClientBootstrapConsensusMaxInProgressTries + > 100) { + COMPLAIN("TestingClientBootstrapConsensusMaxInProgressTries is insanely " + "high."); + } + if (options->TestingDescriptorMaxDownloadTries < 2) { REJECT("TestingDescriptorMaxDownloadTries must be greater than 1."); } else if (options->TestingDescriptorMaxDownloadTries > 800) { diff --git a/src/or/directory.c b/src/or/directory.c index 555462b325..0d2a8b2459 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -3443,26 +3443,54 @@ connection_dir_finished_connecting(dir_connection_t *conn) } /** Decide which download schedule we want to use based on descriptor type - * in dls and whether we are acting as directory server, and - * then return a list of int pointers defining download delays in seconds. - * Helper function for download_status_increment_failure() and - * download_status_reset(). */ + * in dls and options. + * Then return a list of int pointers defining download delays in seconds. + * Helper function for download_status_increment_failure(), + * download_status_reset(), and download_status_increment_attempt(). */ static const smartlist_t * -find_dl_schedule_and_len(download_status_t *dls, int server) +find_dl_schedule(download_status_t *dls, const or_options_t *options) { + /* XX/teor Replace with dir_server_mode from #12538 */ + const int dir_server = options->DirPort_set; + const int multi_d = networkstatus_consensus_can_use_multiple_directories( + options); + const int we_are_bootstrapping = networkstatus_consensus_is_boostrapping( + time(NULL)); + const int use_fallbacks = networkstatus_consensus_can_use_extra_fallbacks( + options); switch (dls->schedule) { case DL_SCHED_GENERIC: - if (server) - return get_options()->TestingServerDownloadSchedule; - else - return get_options()->TestingClientDownloadSchedule; + if (dir_server) { + return options->TestingServerDownloadSchedule; + } else { + return options->TestingClientDownloadSchedule; + } case DL_SCHED_CONSENSUS: - if (server) - return get_options()->TestingServerConsensusDownloadSchedule; - else - return get_options()->TestingClientConsensusDownloadSchedule; + if (!multi_d) { + return options->TestingServerConsensusDownloadSchedule; + } else { + if (we_are_bootstrapping) { + if (!use_fallbacks) { + /* A bootstrapping client without extra fallback directories */ + return + options->TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule; + } else if (dls->want_authority) { + /* A bootstrapping client with extra fallback directories, but + * connecting to an authority */ + return + options->TestingClientBootstrapConsensusAuthorityDownloadSchedule; + } else { + /* A bootstrapping client connecting to extra fallback directories + */ + return + options->TestingClientBootstrapConsensusFallbackDownloadSchedule; + } + } else { + return options->TestingClientConsensusDownloadSchedule; + } + } case DL_SCHED_BRIDGE: - return get_options()->TestingBridgeDownloadSchedule; + return options->TestingBridgeDownloadSchedule; default: tor_assert(0); } @@ -3471,54 +3499,168 @@ find_dl_schedule_and_len(download_status_t *dls, int server) return NULL; } -/** Called when an attempt to download dls has failed with HTTP status +/* Find the current delay for dls based on schedule. + * Set dls->next_attempt_at based on now, and return the delay. + * Helper for download_status_increment_failure and + * download_status_increment_attempt. */ +STATIC int +download_status_schedule_get_delay(download_status_t *dls, + const smartlist_t *schedule, + time_t now) +{ + tor_assert(dls); + tor_assert(schedule); + + int delay = INT_MAX; + uint8_t dls_schedule_position = (dls->increment_on + == DL_SCHED_INCREMENT_ATTEMPT + ? dls->n_download_attempts + : dls->n_download_failures); + + if (dls_schedule_position < smartlist_len(schedule)) + delay = *(int *)smartlist_get(schedule, dls_schedule_position); + else if (dls_schedule_position == IMPOSSIBLE_TO_DOWNLOAD) + delay = INT_MAX; + else + delay = *(int *)smartlist_get(schedule, smartlist_len(schedule) - 1); + + /* A negative delay makes no sense. Knowing that delay is + * non-negative allows us to safely do the wrapping check below. */ + tor_assert(delay >= 0); + + /* Avoid now+delay overflowing INT_MAX, by comparing with a subtraction + * that won't overflow (since delay is non-negative). */ + if (delay < INT_MAX && now <= INT_MAX - delay) { + dls->next_attempt_at = now+delay; + } else { + dls->next_attempt_at = TIME_MAX; + } + + return delay; +} + +/* Log a debug message about item, which increments on increment_action, has + * incremented dls_n_download_increments times. The message varies based on + * was_schedule_incremented (if not, not_incremented_response is logged), and + * the values of increment, dls_next_attempt_at, and now. + * Helper for download_status_increment_failure and + * download_status_increment_attempt. */ +static void +download_status_log_helper(const char *item, int was_schedule_incremented, + const char *increment_action, + const char *not_incremented_response, + uint8_t dls_n_download_increments, int increment, + time_t dls_next_attempt_at, time_t now) +{ + if (item) { + if (!was_schedule_incremented) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again %s.", + item, increment_action, (int)dls_n_download_increments, + not_incremented_response); + else if (increment == 0) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again immediately.", + item, increment_action, (int)dls_n_download_increments); + else if (dls_next_attempt_at < TIME_MAX) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again in %d seconds.", + item, increment_action, (int)dls_n_download_increments, + (int)(dls_next_attempt_at-now)); + else + log_debug(LD_DIR, "%s %s %d time(s); Giving up for a while.", + item, increment_action, (int)dls_n_download_increments); + } +} + +/** Determine when a failed download attempt should be retried. + * Called when an attempt to download dls has failed with HTTP status * status_code. Increment the failure count (if the code indicates a - * real failure) and set dls-\>next_attempt_at to an appropriate time - * in the future. */ + * real failure, or if we're a server) and set dls-\>next_attempt_at to + * an appropriate time in the future and return it. + * If dls->increment_on is DL_SCHED_INCREMENT_ATTEMPT, increment the + * failure count, and return a time in the far future for the next attempt (to + * avoid an immediate retry). */ time_t download_status_increment_failure(download_status_t *dls, int status_code, const char *item, int server, time_t now) { - const smartlist_t *schedule; - int increment; + int increment = -1; tor_assert(dls); + + /* only count the failure if it's permanent, or we're a server */ if (status_code != 503 || server) { if (dls->n_download_failures < IMPOSSIBLE_TO_DOWNLOAD-1) ++dls->n_download_failures; } - schedule = find_dl_schedule_and_len(dls, server); + if (dls->increment_on == DL_SCHED_INCREMENT_FAILURE) { + /* We don't find out that a failure-based schedule has attempted a + * connection until that connection fails. + * We'll never find out about successful connections, but this doesn't + * matter, because schedules are reset after a successful download. + */ + if (dls->n_download_attempts < IMPOSSIBLE_TO_DOWNLOAD-1) + ++dls->n_download_attempts; - if (dls->n_download_failures < smartlist_len(schedule)) - increment = *(int *)smartlist_get(schedule, dls->n_download_failures); - else if (dls->n_download_failures == IMPOSSIBLE_TO_DOWNLOAD) - increment = INT_MAX; - else - increment = *(int *)smartlist_get(schedule, smartlist_len(schedule) - 1); - - if (increment < INT_MAX) - dls->next_attempt_at = now+increment; - else - dls->next_attempt_at = TIME_MAX; - - if (item) { - if (increment == 0) - log_debug(LD_DIR, "%s failed %d time(s); I'll try again immediately.", - item, (int)dls->n_download_failures); - else if (dls->next_attempt_at < TIME_MAX) - log_debug(LD_DIR, "%s failed %d time(s); I'll try again in %d seconds.", - item, (int)dls->n_download_failures, - (int)(dls->next_attempt_at-now)); - else - log_debug(LD_DIR, "%s failed %d time(s); Giving up for a while.", - item, (int)dls->n_download_failures); + /* only return a failure retry time if this schedule increments on failures + */ + const smartlist_t *schedule = find_dl_schedule(dls, get_options()); + increment = download_status_schedule_get_delay(dls, schedule, now); } + + download_status_log_helper(item, !dls->increment_on, "failed", + "concurrently", dls->n_download_failures, + increment, dls->next_attempt_at, now); + + if (dls->increment_on == DL_SCHED_INCREMENT_ATTEMPT) { + /* stop this schedule retrying on failure, it will launch concurrent + * connections instead */ + return TIME_MAX; + } else { + return dls->next_attempt_at; + } +} + +/** Determine when the next download attempt should be made when using an + * attempt-based (potentially concurrent) download schedule. + * Called when an attempt to download dls is being initiated. + * Increment the attempt count and set dls-\>next_attempt_at to an + * appropriate time in the future and return it. + * If dls->increment_on is DL_SCHED_INCREMENT_FAILURE, don't increment + * the attempts, and return a time in the far future (to avoid launching a + * concurrent attempt). */ +time_t +download_status_increment_attempt(download_status_t *dls, const char *item, + time_t now) +{ + int delay = -1; + tor_assert(dls); + + if (dls->increment_on == DL_SCHED_INCREMENT_FAILURE) { + /* this schedule should retry on failure, and not launch any concurrent + attempts */ + log_info(LD_BUG, "Tried to launch an attempt-based connection on a " + "failure-based schedule."); + return TIME_MAX; + } + + if (dls->n_download_attempts < IMPOSSIBLE_TO_DOWNLOAD-1) + ++dls->n_download_attempts; + + const smartlist_t *schedule = find_dl_schedule(dls, get_options()); + delay = download_status_schedule_get_delay(dls, schedule, now); + + download_status_log_helper(item, dls->increment_on, "attempted", + "on failure", dls->n_download_attempts, + delay, dls->next_attempt_at, now); + return dls->next_attempt_at; } /** Reset dls so that it will be considered downloadable * immediately, and/or to show that we don't need it anymore. * + * Must be called to initialise a download schedule, otherwise the zeroth item + * in the schedule will never be used. + * * (We find the zeroth element of the download schedule, and set * next_attempt_at to be the appropriate offset from 'now'. In most * cases this means setting it to 'now', so the item will be immediately @@ -3527,14 +3669,16 @@ download_status_increment_failure(download_status_t *dls, int status_code, void download_status_reset(download_status_t *dls) { - if (dls->n_download_failures == IMPOSSIBLE_TO_DOWNLOAD) + if (dls->n_download_failures == IMPOSSIBLE_TO_DOWNLOAD + || dls->n_download_attempts == IMPOSSIBLE_TO_DOWNLOAD) return; /* Don't reset this. */ - const smartlist_t *schedule = find_dl_schedule_and_len( - dls, get_options()->DirPort_set); + const smartlist_t *schedule = find_dl_schedule(dls, get_options()); dls->n_download_failures = 0; + dls->n_download_attempts = 0; dls->next_attempt_at = time(NULL) + *(int *)smartlist_get(schedule, 0); + /* Don't reset dls->want_authority or dls->increment_on */ } /** Return the number of failures on dls since the last success (if @@ -3545,6 +3689,22 @@ download_status_get_n_failures(const download_status_t *dls) return dls->n_download_failures; } +/** Return the number of attempts to download dls since the last success + * (if any). This can differ from download_status_get_n_failures() due to + * outstanding concurrent attempts. */ +int +download_status_get_n_attempts(const download_status_t *dls) +{ + return dls->n_download_attempts; +} + +/** Return the next time to attempt to download dls. */ +time_t +download_status_get_next_attempt_at(const download_status_t *dls) +{ + return dls->next_attempt_at; +} + /** Called when one or more routerdesc (or extrainfo, if was_extrainfo) * fetches have failed (with uppercase fingerprints listed in failed, * either as descriptor digests or as identity digests based on diff --git a/src/or/directory.h b/src/or/directory.h index bdcc1a23c6..4255868d3a 100644 --- a/src/or/directory.h +++ b/src/or/directory.h @@ -92,6 +92,8 @@ int router_supports_extrainfo(const char *identity_digest, int is_authority); time_t download_status_increment_failure(download_status_t *dls, int status_code, const char *item, int server, time_t now); +time_t download_status_increment_attempt(download_status_t *dls, + const char *item, time_t now); /** Increment the failure count of the download_status_t dls, with * the optional status code sc. */ #define download_status_failed(dls, sc) \ @@ -107,8 +109,9 @@ static INLINE int download_status_is_ready(download_status_t *dls, time_t now, int max_failures) { - return (dls->n_download_failures <= max_failures - && dls->next_attempt_at <= now); + int under_failure_limit = (dls->n_download_failures <= max_failures + && dls->n_download_attempts <= max_failures); + return (under_failure_limit && dls->next_attempt_at <= now); } static void download_status_mark_impossible(download_status_t *dl); @@ -117,9 +120,12 @@ static INLINE void download_status_mark_impossible(download_status_t *dl) { dl->n_download_failures = IMPOSSIBLE_TO_DOWNLOAD; + dl->n_download_attempts = IMPOSSIBLE_TO_DOWNLOAD; } int download_status_get_n_failures(const download_status_t *dls); +int download_status_get_n_attempts(const download_status_t *dls); +time_t download_status_get_next_attempt_at(const download_status_t *dls); #ifdef TOR_UNIT_TESTS /* Used only by directory.c and test_dir.c */ @@ -133,6 +139,9 @@ STATIC int directory_handle_command_get(dir_connection_t *conn, const char *headers, const char *req_body, size_t req_body_len); +STATIC int download_status_schedule_get_delay(download_status_t *dls, + const smartlist_t *schedule, + time_t now); #endif #endif diff --git a/src/or/main.c b/src/or/main.c index 527e2b1ffe..60957bd6ab 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -1876,18 +1876,29 @@ check_for_reachability_bw_callback(time_t now, const or_options_t *options) static int fetch_networkstatus_callback(time_t now, const or_options_t *options) { - /* 2c. Every minute (or every second if TestingTorNetwork), check - * whether we want to download any networkstatus documents. */ + /* 2c. Every minute (or every second if TestingTorNetwork, or during + * client bootstrap), check whether we want to download any networkstatus + * documents. */ /* How often do we check whether we should download network status * documents? */ -#define networkstatus_dl_check_interval(o) ((o)->TestingTorNetwork ? 1 : 60) + const int we_are_bootstrapping = networkstatus_consensus_is_boostrapping( + now); + const int prefer_mirrors = !directory_fetches_from_authorities( + get_options()); + int networkstatus_dl_check_interval = 60; + /* check more often when testing, or when bootstrapping from mirrors + * (connection limits prevent too many connections being made) */ + if (options->TestingTorNetwork + || (we_are_bootstrapping && prefer_mirrors)) { + networkstatus_dl_check_interval = 1; + } if (should_delay_dir_fetches(options, NULL)) return PERIODIC_EVENT_NO_UPDATE; update_networkstatus_downloads(now); - return networkstatus_dl_check_interval(options); + return networkstatus_dl_check_interval; } static int diff --git a/src/or/networkstatus.c b/src/or/networkstatus.c index 71a2c0f121..1d5b2f2723 100644 --- a/src/or/networkstatus.c +++ b/src/or/networkstatus.c @@ -85,8 +85,30 @@ static time_t time_to_download_next_consensus[N_CONSENSUS_FLAVORS]; /** Download status for the current consensus networkstatus. */ static download_status_t consensus_dl_status[N_CONSENSUS_FLAVORS] = { - { 0, 0, DL_SCHED_CONSENSUS }, - { 0, 0, DL_SCHED_CONSENSUS }, + { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_ANY_DIRSERVER, + DL_SCHED_INCREMENT_FAILURE }, + { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_ANY_DIRSERVER, + DL_SCHED_INCREMENT_FAILURE }, + }; + +#define N_CONSENSUS_BOOTSTRAP_SCHEDULES 2 +#define CONSENSUS_BOOTSTRAP_SOURCE_AUTHORITY 0 +#define CONSENSUS_BOOTSTRAP_SOURCE_ANY_DIRSERVER 1 + +/* Using DL_SCHED_INCREMENT_ATTEMPT on these schedules means that + * download_status_increment_failure won't increment these entries. + * However, any bootstrap connection failures that occur after we have + * a valid consensus will count against the failure counts on the non-bootstrap + * schedules. There should only be one of these, as all the others will have + * been cancelled. (This doesn't seem to be a significant issue.) */ +static download_status_t + consensus_bootstrap_dl_status[N_CONSENSUS_BOOTSTRAP_SCHEDULES] = + { + { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_AUTHORITY, + DL_SCHED_INCREMENT_ATTEMPT }, + /* During bootstrap, DL_WANT_ANY_DIRSERVER means "use fallbacks". */ + { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_ANY_DIRSERVER, + DL_SCHED_INCREMENT_ATTEMPT }, }; /** True iff we have logged a warning about this OR's version being older than @@ -97,6 +119,10 @@ static int have_warned_about_old_version = 0; static int have_warned_about_new_version = 0; static void routerstatus_list_update_named_server_map(void); +static void update_consensus_bootstrap_multiple_downloads( + time_t now, + const or_options_t *options, + int we_are_bootstrapping); /** Forget that we've warned about anything networkstatus-related, so we will * give fresh warnings if the same behavior happens again. */ @@ -122,6 +148,9 @@ networkstatus_reset_download_failures(void) for (i=0; i < N_CONSENSUS_FLAVORS; ++i) download_status_reset(&consensus_dl_status[i]); + + for (i=0; i < N_CONSENSUS_BOOTSTRAP_SCHEDULES; ++i) + download_status_reset(&consensus_bootstrap_dl_status[i]); } /** Read every cached v3 consensus networkstatus from the disk. */ @@ -734,6 +763,55 @@ we_want_to_fetch_flavor(const or_options_t *options, int flavor) * fetching certs before we check whether there is a better one? */ #define DELAY_WHILE_FETCHING_CERTS (20*60) +/* Check if a downloaded consensus flavor should still wait for certificates + * to download now. + * If so, return 1. If not, fail dls and return 0. */ +static int +check_consensus_waiting_for_certs(int flavor, time_t now, + download_status_t *dls) +{ + consensus_waiting_for_certs_t *waiting; + + /* We should always have a known flavor, because we_want_to_fetch_flavor() + * filters out unknown flavors. */ + tor_assert(flavor >= 0 && flavor < N_CONSENSUS_FLAVORS); + + waiting = &consensus_waiting_for_certs[flavor]; + if (waiting->consensus) { + /* XXXX make sure this doesn't delay sane downloads. */ + if (waiting->set_at + DELAY_WHILE_FETCHING_CERTS > now) { + return 1; + } else { + if (!waiting->dl_failed) { + download_status_failed(dls, 0); + waiting->dl_failed=1; + } + } + } + + return 0; +} + +/* Return the maximum download tries for a consensus, based on options and + * whether we_are_bootstrapping. */ +static int +consensus_max_download_tries(const or_options_t *options, + int we_are_bootstrapping) +{ + int use_fallbacks = networkstatus_consensus_can_use_extra_fallbacks(options); + + if (we_are_bootstrapping) { + if (use_fallbacks) { + return options->TestingClientBootstrapConsensusMaxDownloadTries; + } else { + return + options->TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries; + } + } + + return options->TestingConsensusMaxDownloadTries; +} + /** If we want to download a fresh consensus, launch a new download as * appropriate. */ static void @@ -741,12 +819,19 @@ update_consensus_networkstatus_downloads(time_t now) { int i; const or_options_t *options = get_options(); + const int we_are_bootstrapping = networkstatus_consensus_is_boostrapping( + now); + const int use_multi_conn = + networkstatus_consensus_can_use_multiple_directories(options); + + if (should_delay_dir_fetches(options, NULL)) + return; for (i=0; i < N_CONSENSUS_FLAVORS; ++i) { /* XXXX need some way to download unknown flavors if we are caching. */ const char *resource; - consensus_waiting_for_certs_t *waiting; networkstatus_t *c; + int max_in_progress_conns = 1; if (! we_want_to_fetch_flavor(options, i)) continue; @@ -762,35 +847,166 @@ update_consensus_networkstatus_downloads(time_t now) resource = networkstatus_get_flavor_name(i); - /* Let's make sure we remembered to update consensus_dl_status */ - tor_assert(consensus_dl_status[i].schedule == DL_SCHED_CONSENSUS); - - if (!download_status_is_ready(&consensus_dl_status[i], now, - options->TestingConsensusMaxDownloadTries)) - continue; /* We failed downloading a consensus too recently. */ - if (connection_dir_get_by_purpose_and_resource( - DIR_PURPOSE_FETCH_CONSENSUS, resource)) - continue; /* There's an in-progress download.*/ - - waiting = &consensus_waiting_for_certs[i]; - if (waiting->consensus) { - /* XXXX make sure this doesn't delay sane downloads. */ - if (waiting->set_at + DELAY_WHILE_FETCHING_CERTS > now) { - continue; /* We're still getting certs for this one. */ - } else { - if (!waiting->dl_failed) { - download_status_failed(&consensus_dl_status[i], 0); - waiting->dl_failed=1; - } - } + /* Check if we already have enough connections in progress */ + if (we_are_bootstrapping) { + max_in_progress_conns = + options->TestingClientBootstrapConsensusMaxInProgressTries; + } + if (connection_dir_count_by_purpose_and_resource( + DIR_PURPOSE_FETCH_CONSENSUS, + resource) + >= max_in_progress_conns) { + continue; } - log_info(LD_DIR, "Launching %s networkstatus consensus download.", - networkstatus_get_flavor_name(i)); + /* Check if we want to launch another download for a usable consensus. + * Only used during bootstrap. */ + if (we_are_bootstrapping && use_multi_conn + && i == usable_consensus_flavor()) { + + /* Check if we're already downloading a usable consensus */ + int consens_conn_count = + connection_dir_count_by_purpose_and_resource( + DIR_PURPOSE_FETCH_CONSENSUS, + resource); + int connect_consens_conn_count = + connection_dir_count_by_purpose_resource_and_state( + DIR_PURPOSE_FETCH_CONSENSUS, + resource, + DIR_CONN_STATE_CONNECTING); + + if (i == usable_consensus_flavor() + && connect_consens_conn_count < consens_conn_count) { + continue; + } + + /* Make multiple connections for a bootstrap consensus download */ + update_consensus_bootstrap_multiple_downloads(now, options, + we_are_bootstrapping); + } else { + /* Check if we failed downloading a consensus too recently */ + int max_dl_tries = consensus_max_download_tries(options, + we_are_bootstrapping); + + /* Let's make sure we remembered to update consensus_dl_status */ + tor_assert(consensus_dl_status[i].schedule == DL_SCHED_CONSENSUS); + + if (!download_status_is_ready(&consensus_dl_status[i], + now, + max_dl_tries)) { + continue; + } + + /* Check if we're waiting for certificates to download */ + if (check_consensus_waiting_for_certs(i, now, &consensus_dl_status[i])) + continue; + + /* Try the requested attempt */ + log_info(LD_DIR, "Launching %s standard networkstatus consensus " + "download.", networkstatus_get_flavor_name(i)); + directory_get_from_dirserver(DIR_PURPOSE_FETCH_CONSENSUS, + ROUTER_PURPOSE_GENERAL, resource, + PDS_RETRY_IF_NO_SERVERS, + consensus_dl_status[i].want_authority); + } + } +} + +/** When we're bootstrapping, launch one or more consensus download + * connections, if schedule indicates connection(s) should be made after now. + * If is_authority, connect to an authority, otherwise, use a fallback + * directory mirror. + */ +static void +update_consensus_bootstrap_attempt_downloads( + time_t now, + const or_options_t *options, + int we_are_bootstrapping, + download_status_t *dls, + download_want_authority_t want_authority) +{ + int max_dl_tries = consensus_max_download_tries(options, + we_are_bootstrapping); + const char *resource = networkstatus_get_flavor_name( + usable_consensus_flavor()); + + /* Let's make sure we remembered to update schedule */ + tor_assert(dls->schedule == DL_SCHED_CONSENSUS); + + /* Allow for multiple connections in the same second, if the schedule value + * is 0. */ + while (download_status_is_ready(dls, now, max_dl_tries)) { + log_info(LD_DIR, "Launching %s bootstrap %s networkstatus consensus " + "download.", resource, (want_authority == DL_WANT_AUTHORITY + ? "authority" + : "mirror")); directory_get_from_dirserver(DIR_PURPOSE_FETCH_CONSENSUS, ROUTER_PURPOSE_GENERAL, resource, - PDS_RETRY_IF_NO_SERVERS); + PDS_RETRY_IF_NO_SERVERS, want_authority); + /* schedule the next attempt */ + download_status_increment_attempt(dls, resource, now); + } +} + +/** If we're bootstrapping, check the connection schedules and see if we want + * to make additional, potentially concurrent, consensus download + * connections. + * Only call when bootstrapping, and when we want to make additional + * connections. Only nodes that satisfy + * networkstatus_consensus_can_use_multiple_directories make additonal + * connections. + */ +static void +update_consensus_bootstrap_multiple_downloads(time_t now, + const or_options_t *options, + int we_are_bootstrapping) +{ + const int usable_flavor = usable_consensus_flavor(); + + /* make sure we can use multiple connections */ + if (!networkstatus_consensus_can_use_multiple_directories(options)) { + return; + } + + /* If we've managed to validate a usable consensus, don't make additonal + * connections. */ + if (!we_are_bootstrapping) { + return; + } + + /* Launch concurrent consensus download attempt(s) based on the mirror and + * authority schedules. Try the mirror first - this makes it slightly more + * likely that we'll connect to the fallback first, and then end the + * authority connection attempt. */ + + /* If a consensus download fails because it's waiting for certificates, + * we'll fail both the authority and fallback schedules. This is better than + * failing only one of the schedules, and having the other continue + * unchecked. + */ + + /* If we don't have or can't use extra fallbacks, don't try them. */ + if (networkstatus_consensus_can_use_extra_fallbacks(options)) { + download_status_t *dls_f = + &consensus_bootstrap_dl_status[CONSENSUS_BOOTSTRAP_SOURCE_ANY_DIRSERVER]; + + if (!check_consensus_waiting_for_certs(usable_flavor, now, dls_f)) { + /* During bootstrap, DL_WANT_ANY_DIRSERVER means "use fallbacks". */ + update_consensus_bootstrap_attempt_downloads(now, options, + we_are_bootstrapping, dls_f, + DL_WANT_ANY_DIRSERVER); + } + } + + /* Now try an authority. */ + download_status_t *dls_a = + &consensus_bootstrap_dl_status[CONSENSUS_BOOTSTRAP_SOURCE_AUTHORITY]; + + if (!check_consensus_waiting_for_certs(usable_flavor, now, dls_a)) { + update_consensus_bootstrap_attempt_downloads(now, options, + we_are_bootstrapping, dls_a, + DL_WANT_AUTHORITY); } } @@ -1057,6 +1273,66 @@ networkstatus_get_reasonably_live_consensus(time_t now, int flavor) return NULL; } +/** Check if we're bootstrapping a consensus download. This means that we are + * only using the authorities and fallback directory mirrors to download the + * consensus flavour we'll use. */ +int +networkstatus_consensus_is_boostrapping(time_t now) +{ + /* If we don't have a consensus, we must still be bootstrapping */ + return !networkstatus_get_reasonably_live_consensus( + now, + usable_consensus_flavor()); +} + +/** Check if we can use multiple directories for a consensus download. + * Only clients (including bridges, but excluding bridge clients) benefit + * from multiple simultaneous consensus downloads. */ +int +networkstatus_consensus_can_use_multiple_directories( + const or_options_t *options) +{ + /* If we are a client, bridge, bridge client, or hidden service */ + return (!directory_fetches_from_authorities(options)); +} + +/** Check if we can use fallback directory mirrors for a consensus download. + * Only clients that have a list of additional fallbacks can use fallbacks. */ +int +networkstatus_consensus_can_use_extra_fallbacks(const or_options_t *options) +{ + /* If we are a client, and we have additional mirrors, we can use them. + * The list length comparisons are a quick way to check if we have any + * non-authority fallback directories. If we ever have any authorities that + * aren't fallback directories, we will need to change this code. */ + return (!directory_fetches_from_authorities(options) + && (smartlist_len(router_get_fallback_dir_servers()) + > smartlist_len(router_get_trusted_dir_servers()))); +} + +/* Is tor currently downloading a consensus of the usable flavor? */ +int +networkstatus_consensus_is_downloading_usable_flavor(void) +{ + const char *usable_resource = networkstatus_get_flavor_name( + usable_consensus_flavor()); + const int consens_conn_usable_count = + connection_dir_count_by_purpose_and_resource( + DIR_PURPOSE_FETCH_CONSENSUS, + usable_resource); + + const int connect_consens_conn_usable_count = + connection_dir_count_by_purpose_resource_and_state( + DIR_PURPOSE_FETCH_CONSENSUS, + usable_resource, + DIR_CONN_STATE_CONNECTING); + if (connect_consens_conn_usable_count < consens_conn_usable_count) { + return 1; + } + + return 0; +} + /** Given two router status entries for the same router identity, return 1 if * if the contents have changed between them. Otherwise, return 0. */ static int diff --git a/src/or/networkstatus.h b/src/or/networkstatus.h index d6e9e37013..d44022c80c 100644 --- a/src/or/networkstatus.h +++ b/src/or/networkstatus.h @@ -70,6 +70,13 @@ MOCK_DECL(networkstatus_t *,networkstatus_get_latest_consensus_by_flavor, networkstatus_t *networkstatus_get_live_consensus(time_t now); networkstatus_t *networkstatus_get_reasonably_live_consensus(time_t now, int flavor); +int networkstatus_consensus_is_boostrapping(time_t now); +int networkstatus_consensus_can_use_multiple_directories( + const or_options_t *options); +int networkstatus_consensus_can_use_extra_fallbacks( + const or_options_t *options); +int networkstatus_consensus_is_downloading_usable_flavor(void); + #define NSSET_FROM_CACHE 1 #define NSSET_WAS_WAITING_FOR_CERTS 2 #define NSSET_DONT_DOWNLOAD_CERTS 4 diff --git a/src/or/or.h b/src/or/or.h index c5596e3a58..850a6f9390 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1946,8 +1946,8 @@ typedef enum { } saved_location_t; #define saved_location_bitfield_t ENUM_BF(saved_location_t) -/** Enumeration: what kind of download schedule are we using for a given - * object? */ +/** Enumeration: what directory object is being downloaded? + * This determines which schedule is selected to perform the download. */ typedef enum { DL_SCHED_GENERIC = 0, DL_SCHED_CONSENSUS = 1, @@ -1955,24 +1955,74 @@ typedef enum { } download_schedule_t; #define download_schedule_bitfield_t ENUM_BF(download_schedule_t) -/** Enumeration: do we want to try an authority or a fallback directory - * mirror for our download? */ +/** Enumeration: is the download schedule for downloading from an authority, + * or from any available directory mirror? + * During bootstrap, "any" means a fallback (or an authority, if there + * are no fallbacks). + * When we have a valid consensus, "any" means any directory server. */ typedef enum { - DL_WANT_FALLBACK = 0, + DL_WANT_ANY_DIRSERVER = 0, DL_WANT_AUTHORITY = 1, } download_want_authority_t; #define download_want_authority_bitfield_t \ ENUM_BF(download_want_authority_t) -/** Information about our plans for retrying downloads for a downloadable - * object. */ -typedef struct download_status_t { - time_t next_attempt_at; /**< When should we try downloading this descriptor - * again? */ - uint8_t n_download_failures; /**< Number of failures trying to download the - * most recent descriptor. */ - download_schedule_bitfield_t schedule : 8; +/** Enumeration: do we want to increment the schedule position each time a + * connection is attempted (these attempts can be concurrent), or do we want + * to increment the schedule position after a connection fails? */ +typedef enum { + DL_SCHED_INCREMENT_FAILURE = 0, + DL_SCHED_INCREMENT_ATTEMPT = 1, +} download_schedule_increment_t; +#define download_schedule_increment_bitfield_t \ + ENUM_BF(download_schedule_increment_t) +/** Information about our plans for retrying downloads for a downloadable + * directory object. + * Each type of downloadable directory object has a corresponding retry + * schedule, which can be different depending on whether the object is + * being downloaded from an authority or a mirror (want_authority). + * next_attempt_at contains the next time we will attempt to download + * the object. + * For schedules that increment_on failure, n_download_failures + * is used to determine the position in the schedule. (Each schedule is a + * smartlist of integer delays, parsed from a CSV option.) Every time a + * connection attempt fails, n_download_failures is incremented, + * the new delay value is looked up from the schedule, and + * next_attempt_at is set delay seconds from the time the previous + * connection failed. Therefore, at most one failure-based connection can be + * in progress for each download_status_t. + * For schedules that increment_on attempt, n_download_attempts + * is used to determine the position in the schedule. Every time a + * connection attempt is made, n_download_attempts is incremented, + * the new delay value is looked up from the schedule, and + * next_attempt_at is set delay seconds from the time the previous + * connection was attempted. Therefore, multiple concurrent attempted-based + * connections can be in progress for each download_status_t. + * After an object is successfully downloaded, any other concurrent connections + * are terminated. A new schedule which starts at position 0 is used for + * subsequent downloads of the same object. + */ +typedef struct download_status_t { + time_t next_attempt_at; /**< When should we try downloading this object + * again? */ + uint8_t n_download_failures; /**< Number of failed downloads of the most + * recent object, since the last success. */ + uint8_t n_download_attempts; /**< Number of (potentially concurrent) attempts + * to download the most recent object, since + * the last success. */ + download_schedule_bitfield_t schedule : 8; /**< What kind of object is being + * downloaded? This determines the + * schedule used for the download. + */ + download_want_authority_bitfield_t want_authority : 1; /**< Is the download + * happening from an authority + * or a mirror? This determines + * the schedule used for the + * download. */ + download_schedule_increment_bitfield_t increment_on : 1; /**< does this + * schedule increment on each attempt, + * or after each failure? */ } download_status_t; /** If n_download_failures is this high, the download can never happen. */ @@ -4078,6 +4128,36 @@ typedef struct { * on testing networks. */ smartlist_t *TestingClientConsensusDownloadSchedule; + /** Schedule for when clients should download consensuses from authorities + * if they are bootstrapping (that is, they don't have a usable, reasonably + * live consensus). Only used by clients fetching from a list of fallback + * directory mirrors. + * + * This schedule is incremented by (potentially concurrent) connection + * attempts, unlike other schedules, which are incremented by connection + * failures. Only altered on testing networks. */ + smartlist_t *TestingClientBootstrapConsensusAuthorityDownloadSchedule; + + /** Schedule for when clients should download consensuses from fallback + * directory mirrors if they are bootstrapping (that is, they don't have a + * usable, reasonably live consensus). Only used by clients fetching from a + * list of fallback directory mirrors. + * + * This schedule is incremented by (potentially concurrent) connection + * attempts, unlike other schedules, which are incremented by connection + * failures. Only altered on testing networks. */ + smartlist_t *TestingClientBootstrapConsensusFallbackDownloadSchedule; + + /** Schedule for when clients should download consensuses from authorities + * if they are bootstrapping (that is, they don't have a usable, reasonably + * live consensus). Only used by clients which don't have or won't fetch + * from a list of fallback directory mirrors. + * + * This schedule is incremented by (potentially concurrent) connection + * attempts, unlike other schedules, which are incremented by connection + * failures. Only altered on testing networks. */ + smartlist_t *TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule; + /** Schedule for when clients should download bridge descriptors. Only * altered on testing networks. */ smartlist_t *TestingBridgeDownloadSchedule; @@ -4095,6 +4175,21 @@ typedef struct { * up? Only altered on testing networks. */ int TestingConsensusMaxDownloadTries; + /** How many times will a client try to fetch a consensus while + * bootstrapping using a list of fallback directories, before it gives up? + * Only altered on testing networks. */ + int TestingClientBootstrapConsensusMaxDownloadTries; + + /** How many times will a client try to fetch a consensus while + * bootstrapping using only a list of authorities, before it gives up? + * Only altered on testing networks. */ + int TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries; + + /** How many simultaneous in-progress connections will we make when trying + * to fetch a consensus before we wait for one to complete, timeout, or + * error out? Only altered on testing networks. */ + int TestingClientBootstrapConsensusMaxInProgressTries; + /** How many times will we try to download a router's descriptor before * giving up? Only altered on testing networks. */ int TestingDescriptorMaxDownloadTries; diff --git a/src/or/routerlist.c b/src/or/routerlist.c index ca5105807e..0027a04cb5 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -900,7 +900,7 @@ authority_certs_fetch_missing(networkstatus_t *status, time_t now) /* XXX - do we want certs from authorities or mirrors? - teor */ directory_get_from_dirserver(DIR_PURPOSE_FETCH_CERTIFICATE, 0, resource, PDS_RETRY_IF_NO_SERVERS, - DL_WANT_FALLBACK); + DL_WANT_ANY_DIRSERVER); tor_free(resource); } /* else we didn't add any: they were all pending */ @@ -946,7 +946,7 @@ authority_certs_fetch_missing(networkstatus_t *status, time_t now) /* XXX - do we want certs from authorities or mirrors? - teor */ directory_get_from_dirserver(DIR_PURPOSE_FETCH_CERTIFICATE, 0, resource, PDS_RETRY_IF_NO_SERVERS, - DL_WANT_FALLBACK); + DL_WANT_ANY_DIRSERVER); tor_free(resource); } /* else they were all pending */ @@ -4380,14 +4380,14 @@ MOCK_IMPL(STATIC void, initiate_descriptor_downloads, tor_free(cp); if (source) { - /* We know which authority we want. */ + /* We know which authority or directory mirror we want. */ directory_initiate_command_routerstatus(source, purpose, ROUTER_PURPOSE_GENERAL, DIRIND_ONEHOP, resource, NULL, 0, 0); } else { directory_get_from_dirserver(purpose, ROUTER_PURPOSE_GENERAL, resource, - pds_flags, DL_WANT_FALLBACK); + pds_flags, DL_WANT_ANY_DIRSERVER); } tor_free(resource); } @@ -4669,9 +4669,14 @@ launch_dummy_descriptor_download_as_needed(time_t now, last_descriptor_download_attempted + DUMMY_DOWNLOAD_INTERVAL < now && last_dummy_download + DUMMY_DOWNLOAD_INTERVAL < now) { last_dummy_download = now; + /* XX/teor - do we want an authority here, because they are less likely + * to give us the wrong address? (See #17782) + * I'm leaving the previous behaviour intact, because I don't like + * the idea of some relays contacting an authority every 20 minutes. */ directory_get_from_dirserver(DIR_PURPOSE_FETCH_SERVERDESC, ROUTER_PURPOSE_GENERAL, "authority.z", - PDS_RETRY_IF_NO_SERVERS, DL_WANT_FALLBACK); + PDS_RETRY_IF_NO_SERVERS, + DL_WANT_ANY_DIRSERVER); } } diff --git a/src/test/test_dir.c b/src/test/test_dir.c index 855746e749..ce639b644f 100644 --- a/src/test/test_dir.c +++ b/src/test/test_dir.c @@ -3494,6 +3494,435 @@ test_dir_packages(void *arg) tor_free(res); } +static void +test_dir_download_status_schedule(void *arg) +{ + (void)arg; + download_status_t dls_failure = { 0, 0, 0, DL_SCHED_GENERIC, + DL_WANT_AUTHORITY, + DL_SCHED_INCREMENT_FAILURE }; + download_status_t dls_attempt = { 0, 0, 0, DL_SCHED_CONSENSUS, + DL_WANT_ANY_DIRSERVER, + DL_SCHED_INCREMENT_ATTEMPT}; + download_status_t dls_bridge = { 0, 0, 0, DL_SCHED_BRIDGE, + DL_WANT_AUTHORITY, + DL_SCHED_INCREMENT_FAILURE}; + int increment = -1; + int expected_increment = -1; + time_t current_time = time(NULL); + int delay1 = -1; + int delay2 = -1; + smartlist_t *schedule = smartlist_new(); + + /* Make a dummy schedule */ + smartlist_add(schedule, (void *)&delay1); + smartlist_add(schedule, (void *)&delay2); + + /* check a range of values */ + delay1 = 1000; + increment = download_status_schedule_get_delay(&dls_failure, + schedule, + TIME_MIN); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_failure.next_attempt_at == TIME_MIN + expected_increment); + +#if TIME_T_IS_SIGNED + delay1 = INT_MAX; + increment = download_status_schedule_get_delay(&dls_failure, + schedule, + -1); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_failure.next_attempt_at == TIME_MAX); +#endif + + delay1 = 0; + increment = download_status_schedule_get_delay(&dls_attempt, + schedule, + 0); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_attempt.next_attempt_at == 0 + expected_increment); + + delay1 = 1000; + increment = download_status_schedule_get_delay(&dls_attempt, + schedule, + 1); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_attempt.next_attempt_at == 1 + expected_increment); + + delay1 = INT_MAX; + increment = download_status_schedule_get_delay(&dls_bridge, + schedule, + current_time); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_bridge.next_attempt_at == TIME_MAX); + + delay1 = 1; + increment = download_status_schedule_get_delay(&dls_bridge, + schedule, + TIME_MAX); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_bridge.next_attempt_at == TIME_MAX); + + /* see what happens when we reach the end */ + dls_attempt.n_download_attempts++; + dls_bridge.n_download_failures++; + + delay2 = 100; + increment = download_status_schedule_get_delay(&dls_attempt, + schedule, + current_time); + expected_increment = delay2; + tt_assert(increment == expected_increment); + tt_assert(dls_attempt.next_attempt_at == current_time + delay2); + + delay2 = 1; + increment = download_status_schedule_get_delay(&dls_bridge, + schedule, + current_time); + expected_increment = delay2; + tt_assert(increment == expected_increment); + tt_assert(dls_bridge.next_attempt_at == current_time + delay2); + + /* see what happens when we try to go off the end */ + dls_attempt.n_download_attempts++; + dls_bridge.n_download_failures++; + + delay2 = 5; + increment = download_status_schedule_get_delay(&dls_attempt, + schedule, + current_time); + expected_increment = delay2; + tt_assert(increment == expected_increment); + tt_assert(dls_attempt.next_attempt_at == current_time + delay2); + + delay2 = 17; + increment = download_status_schedule_get_delay(&dls_bridge, + schedule, + current_time); + expected_increment = delay2; + tt_assert(increment == expected_increment); + tt_assert(dls_bridge.next_attempt_at == current_time + delay2); + + /* see what happens when we reach IMPOSSIBLE_TO_DOWNLOAD */ + dls_attempt.n_download_attempts = IMPOSSIBLE_TO_DOWNLOAD; + dls_bridge.n_download_failures = IMPOSSIBLE_TO_DOWNLOAD; + + delay2 = 35; + increment = download_status_schedule_get_delay(&dls_attempt, + schedule, + current_time); + expected_increment = INT_MAX; + tt_assert(increment == expected_increment); + tt_assert(dls_attempt.next_attempt_at == TIME_MAX); + + delay2 = 99; + increment = download_status_schedule_get_delay(&dls_bridge, + schedule, + current_time); + expected_increment = INT_MAX; + tt_assert(increment == expected_increment); + tt_assert(dls_bridge.next_attempt_at == TIME_MAX); + + done: + /* the pointers in schedule are allocated on the stack */ + smartlist_free(schedule); +} + +static void +test_dir_download_status_increment(void *arg) +{ + (void)arg; + download_status_t dls_failure = { 0, 0, 0, DL_SCHED_GENERIC, + DL_WANT_AUTHORITY, + DL_SCHED_INCREMENT_FAILURE }; + download_status_t dls_attempt = { 0, 0, 0, DL_SCHED_BRIDGE, + DL_WANT_ANY_DIRSERVER, + DL_SCHED_INCREMENT_ATTEMPT}; + int delay0 = -1; + int delay1 = -1; + int delay2 = -1; + smartlist_t *schedule = smartlist_new(); + or_options_t test_options; + time_t next_at = TIME_MAX; + time_t current_time = time(NULL); + + /* Provide some values for the schedule */ + delay0 = 10; + delay1 = 99; + delay2 = 20; + + /* Make the schedule */ + smartlist_add(schedule, (void *)&delay0); + smartlist_add(schedule, (void *)&delay1); + smartlist_add(schedule, (void *)&delay2); + + /* Put it in the options */ + mock_options = &test_options; + reset_options(mock_options, &mock_get_options_calls); + mock_options->TestingClientDownloadSchedule = schedule; + mock_options->TestingBridgeDownloadSchedule = schedule; + + MOCK(get_options, mock_get_options); + + /* Check that a failure reset works */ + mock_get_options_calls = 0; + download_status_reset(&dls_failure); + /* we really want to test that it's equal to time(NULL) + delay0, but that's + * an unrealiable test, because time(NULL) might change. */ + tt_assert(download_status_get_next_attempt_at(&dls_failure) + >= current_time + delay0); + tt_assert(download_status_get_next_attempt_at(&dls_failure) + != TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_failure) == 0); + tt_assert(download_status_get_n_attempts(&dls_failure) == 0); + tt_assert(mock_get_options_calls >= 1); + + /* avoid timing inconsistencies */ + dls_failure.next_attempt_at = current_time + delay0; + + /* check that a reset schedule becomes ready at the right time */ + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay0 - 1, + 1) == 0); + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay0, + 1) == 1); + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay0 + 1, + 1) == 1); + + /* Check that a failure increment works */ + mock_get_options_calls = 0; + next_at = download_status_increment_failure(&dls_failure, 404, "test", 0, + current_time); + tt_assert(next_at == current_time + delay1); + tt_assert(download_status_get_n_failures(&dls_failure) == 1); + tt_assert(download_status_get_n_attempts(&dls_failure) == 1); + tt_assert(mock_get_options_calls >= 1); + + /* check that an incremented schedule becomes ready at the right time */ + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay1 - 1, + 1) == 0); + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay1, + 1) == 1); + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay1 + 1, + 1) == 1); + + /* check that a schedule isn't ready if it's had too many failures */ + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay1 + 10, + 0) == 0); + + /* Check that failure increments don't happen on 503 for clients, but that + * attempt increments do. */ + mock_get_options_calls = 0; + next_at = download_status_increment_failure(&dls_failure, 503, "test", 0, + current_time); + tt_assert(next_at == current_time + delay1); + tt_assert(download_status_get_n_failures(&dls_failure) == 1); + tt_assert(download_status_get_n_attempts(&dls_failure) == 2); + tt_assert(mock_get_options_calls >= 1); + + /* Check that failure increments do happen on 503 for servers */ + mock_get_options_calls = 0; + next_at = download_status_increment_failure(&dls_failure, 503, "test", 1, + current_time); + tt_assert(next_at == current_time + delay2); + tt_assert(download_status_get_n_failures(&dls_failure) == 2); + tt_assert(download_status_get_n_attempts(&dls_failure) == 3); + tt_assert(mock_get_options_calls >= 1); + + /* Check what happens when we run off the end of the schedule */ + mock_get_options_calls = 0; + next_at = download_status_increment_failure(&dls_failure, 404, "test", 0, + current_time); + tt_assert(next_at == current_time + delay2); + tt_assert(download_status_get_n_failures(&dls_failure) == 3); + tt_assert(download_status_get_n_attempts(&dls_failure) == 4); + tt_assert(mock_get_options_calls >= 1); + + /* Check what happens when we hit the failure limit */ + mock_get_options_calls = 0; + download_status_mark_impossible(&dls_failure); + next_at = download_status_increment_failure(&dls_failure, 404, "test", 0, + current_time); + tt_assert(next_at == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_failure) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(download_status_get_n_attempts(&dls_failure) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(mock_get_options_calls >= 1); + + /* Check that a failure reset doesn't reset at the limit */ + mock_get_options_calls = 0; + download_status_reset(&dls_failure); + tt_assert(download_status_get_next_attempt_at(&dls_failure) + == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_failure) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(download_status_get_n_attempts(&dls_failure) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(mock_get_options_calls == 0); + + /* Check that a failure reset resets just before the limit */ + mock_get_options_calls = 0; + dls_failure.n_download_failures = IMPOSSIBLE_TO_DOWNLOAD - 1; + dls_failure.n_download_attempts = IMPOSSIBLE_TO_DOWNLOAD - 1; + download_status_reset(&dls_failure); + /* we really want to test that it's equal to time(NULL) + delay0, but that's + * an unrealiable test, because time(NULL) might change. */ + tt_assert(download_status_get_next_attempt_at(&dls_failure) + >= current_time + delay0); + tt_assert(download_status_get_next_attempt_at(&dls_failure) + != TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_failure) == 0); + tt_assert(download_status_get_n_attempts(&dls_failure) == 0); + tt_assert(mock_get_options_calls >= 1); + + /* Check that failure increments do happen on attempt-based schedules, + * but that the retry is set at the end of time */ + mock_get_options_calls = 0; + next_at = download_status_increment_failure(&dls_attempt, 404, "test", 0, + current_time); + tt_assert(next_at == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_attempt) == 1); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 0); + tt_assert(mock_get_options_calls == 0); + + /* Check that an attempt reset works */ + mock_get_options_calls = 0; + download_status_reset(&dls_attempt); + /* we really want to test that it's equal to time(NULL) + delay0, but that's + * an unrealiable test, because time(NULL) might change. */ + tt_assert(download_status_get_next_attempt_at(&dls_attempt) + >= current_time + delay0); + tt_assert(download_status_get_next_attempt_at(&dls_attempt) + != TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_attempt) == 0); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 0); + tt_assert(mock_get_options_calls >= 1); + + /* avoid timing inconsistencies */ + dls_attempt.next_attempt_at = current_time + delay0; + + /* check that a reset schedule becomes ready at the right time */ + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay0 - 1, + 1) == 0); + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay0, + 1) == 1); + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay0 + 1, + 1) == 1); + + /* Check that an attempt increment works */ + mock_get_options_calls = 0; + next_at = download_status_increment_attempt(&dls_attempt, "test", + current_time); + tt_assert(next_at == current_time + delay1); + tt_assert(download_status_get_n_failures(&dls_attempt) == 0); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 1); + tt_assert(mock_get_options_calls >= 1); + + /* check that an incremented schedule becomes ready at the right time */ + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay1 - 1, + 1) == 0); + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay1, + 1) == 1); + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay1 + 1, + 1) == 1); + + /* check that a schedule isn't ready if it's had too many attempts */ + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay1 + 10, + 0) == 0); + + /* Check what happens when we reach then run off the end of the schedule */ + mock_get_options_calls = 0; + next_at = download_status_increment_attempt(&dls_attempt, "test", + current_time); + tt_assert(next_at == current_time + delay2); + tt_assert(download_status_get_n_failures(&dls_attempt) == 0); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 2); + tt_assert(mock_get_options_calls >= 1); + + mock_get_options_calls = 0; + next_at = download_status_increment_attempt(&dls_attempt, "test", + current_time); + tt_assert(next_at == current_time + delay2); + tt_assert(download_status_get_n_failures(&dls_attempt) == 0); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 3); + tt_assert(mock_get_options_calls >= 1); + + /* Check what happens when we hit the attempt limit */ + mock_get_options_calls = 0; + download_status_mark_impossible(&dls_attempt); + next_at = download_status_increment_attempt(&dls_attempt, "test", + current_time); + tt_assert(next_at == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_attempt) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(download_status_get_n_attempts(&dls_attempt) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(mock_get_options_calls >= 1); + + /* Check that an attempt reset doesn't reset at the limit */ + mock_get_options_calls = 0; + download_status_reset(&dls_attempt); + tt_assert(download_status_get_next_attempt_at(&dls_attempt) + == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_attempt) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(download_status_get_n_attempts(&dls_attempt) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(mock_get_options_calls == 0); + + /* Check that an attempt reset resets just before the limit */ + mock_get_options_calls = 0; + dls_attempt.n_download_failures = IMPOSSIBLE_TO_DOWNLOAD - 1; + dls_attempt.n_download_attempts = IMPOSSIBLE_TO_DOWNLOAD - 1; + download_status_reset(&dls_attempt); + /* we really want to test that it's equal to time(NULL) + delay0, but that's + * an unrealiable test, because time(NULL) might change. */ + tt_assert(download_status_get_next_attempt_at(&dls_attempt) + >= current_time + delay0); + tt_assert(download_status_get_next_attempt_at(&dls_attempt) + != TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_attempt) == 0); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 0); + tt_assert(mock_get_options_calls >= 1); + + /* Check that attempt increments don't happen on failure-based schedules, + * and that the attempt is set at the end of time */ + mock_get_options_calls = 0; + next_at = download_status_increment_attempt(&dls_failure, "test", + current_time); + tt_assert(next_at == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_failure) == 0); + tt_assert(download_status_get_n_attempts(&dls_failure) == 0); + tt_assert(mock_get_options_calls == 0); + + done: + /* the pointers in schedule are allocated on the stack */ + smartlist_free(schedule); + UNMOCK(get_options); + mock_options = NULL; + mock_get_options_calls = 0; +} + #define DIR_LEGACY(name) \ { #name, test_dir_ ## name , TT_FORK, NULL, NULL } @@ -3525,6 +3954,8 @@ struct testcase_t dir_tests[] = { DIR(purpose_needs_anonymity, 0), DIR(fetch_type, 0), DIR(packages, 0), + DIR(download_status_schedule, 0), + DIR(download_status_increment, 0), END_OF_TESTCASES };