diff --git a/changes/bug4483-multiple-consensus-downloads b/changes/bug4483-multiple-consensus-downloads new file mode 100644 index 0000000000..23d22a89c4 --- /dev/null +++ b/changes/bug4483-multiple-consensus-downloads @@ -0,0 +1,9 @@ + o Major features (consensus downloads): + - Schedule multiple in-progress consensus downloads during client + bootstrap. Use the first one that starts downloading, close the + rest. This reduces failures when authorities are slow or down. + With #15775, it reduces failures due to fallback churn. + Implements #4483 (reduce failures when authorities are down). + Patch by "teor". + Implements IPv4 portions of proposal #210 by "mikeperry" and + "teor". diff --git a/doc/tor.1.txt b/doc/tor.1.txt index 041b000f09..77e4c4e8fe 100644 --- a/doc/tor.1.txt +++ b/doc/tor.1.txt @@ -2281,10 +2281,18 @@ The following options are used for running a testing Tor network. TestingClientDownloadSchedule 0, 0, 5, 10, 15, 20, 30, 60 TestingServerConsensusDownloadSchedule 0, 0, 5, 10, 15, 20, 30, 60 TestingClientConsensusDownloadSchedule 0, 0, 5, 10, 15, 20, 30, 60 + TestingClientBootstrapConsensusAuthorityDownloadSchedule 0, 2, + 4 (for 40 seconds), 8, 16, 32, 60 + TestingClientBootstrapConsensusFallbackDownloadSchedule 0, 1, + 4 (for 40 seconds), 8, 16, 32, 60 + TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule 0, 1, + 4 (for 40 seconds), 8, 16, 32, 60 TestingBridgeDownloadSchedule 60, 30, 30, 60 TestingClientMaxIntervalWithoutRequest 5 seconds TestingDirConnectionMaxStall 30 seconds TestingConsensusMaxDownloadTries 80 + TestingClientBootstrapConsensusMaxDownloadTries 80 + TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries 80 TestingDescriptorMaxDownloadTries 80 TestingMicrodescMaxDownloadTries 80 TestingCertMaxDownloadTries 80 @@ -2345,6 +2353,36 @@ The following options are used for running a testing Tor network. requires that **TestingTorNetwork** is set. (Default: 0, 0, 60, 300, 600, 1800, 3600, 3600, 3600, 10800, 21600, 43200) +[[TestingClientBootstrapConsensusAuthorityDownloadSchedule]] **TestingClientBootstrapConsensusAuthorityDownloadSchedule** __N__,__N__,__...__:: + Schedule for when clients should download consensuses from authorities if + they are bootstrapping (that is, they don't have a usable, reasonably live + consensus). Only used by clients fetching from a list of fallback + directory mirrors. This schedule is advanced by (potentially concurrent) + connection attempts, unlike other schedules, which are advanced by + connection failures. Changing this schedule requires that + **TestingTorNetwork** is set. (Default: 10, 11, 3600, 10800, 25200, 54000, + 111600, 262800) + +[[TestingClientBootstrapConsensusFallbackDownloadSchedule]] **TestingClientBootstrapConsensusFallbackDownloadSchedule** __N__,__N__,__...__:: + Schedule for when clients should download consensuses from fallback + directory mirrors if they are bootstrapping (that is, they don't have a + usable, reasonably live consensus). Only used by clients fetching from a + list of fallback directory mirrors. This schedule is advanced by + (potentially concurrent) connection attempts, unlike other schedules, which + are advanced by connection failures. Changing this schedule requires that + **TestingTorNetwork** is set. (Default: 0, 1, 4, 11, 3600, 10800, 25200, + 54000, 111600, 262800) + +[[TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule]] **TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule** __N__,__N__,__...__:: + Schedule for when clients should download consensuses from authorities if + they are bootstrapping (that is, they don't have a usable, reasonably live + consensus). Only used by clients which don't have or won't fetch from a + list of fallback directory mirrors. This schedule is advanced by + (potentially concurrent) connection attempts, unlike other schedules, + which are advanced by connection failures. Changing this schedule requires + that **TestingTorNetwork** is set. (Default: 0, 3, 7, 3600, 10800, 25200, + 54000, 111600, 262800) + [[TestingBridgeDownloadSchedule]] **TestingBridgeDownloadSchedule** __N__,__N__,__...__:: Schedule for when clients should download bridge descriptors. Changing this requires that **TestingTorNetwork** is set. (Default: 3600, 900, 900, 3600) @@ -2361,9 +2399,24 @@ The following options are used for running a testing Tor network. 5 minutes) [[TestingConsensusMaxDownloadTries]] **TestingConsensusMaxDownloadTries** __NUM__:: - Try this often to download a consensus before giving up. Changing + Try this many times to download a consensus before giving up. Changing this requires that **TestingTorNetwork** is set. (Default: 8) +[[TestingClientBootstrapConsensusMaxDownloadTries]] **TestingClientBootstrapConsensusMaxDownloadTries** __NUM__:: + Try this many times to download a consensus while bootstrapping using + fallback directory mirrors before giving up. Changing this requires that + **TestingTorNetwork** is set. (Default: 7) + +[[TestingClientBootstrapConsensusMaxDownloadTries]] **TestingClientBootstrapConsensusMaxDownloadTries** __NUM__:: + Try this many times to download a consensus while bootstrapping using + only authorities before giving up. Changing this requires that + **TestingTorNetwork** is set. (Default: 4) + +[[TestingClientBootstrapConsensusMaxInProgressTries]] **TestingClientBootstrapConsensusMaxInProgressTries** __NUM__:: + Try this many simultaneous connections to download a consensus before + waiting for one to complete, timeout, or error out. Changing this + requires that **TestingTorNetwork** is set. (Default: 4) + [[TestingDescriptorMaxDownloadTries]] **TestingDescriptorMaxDownloadTries** __NUM__:: Try this often to download a server descriptor before giving up. Changing this requires that **TestingTorNetwork** is set. (Default: 8) diff --git a/src/common/torint.h b/src/common/torint.h index 6171700898..418fe0fabf 100644 --- a/src/common/torint.h +++ b/src/common/torint.h @@ -336,6 +336,32 @@ typedef uint32_t uintptr_t; #endif /* time_t_is_signed */ #endif /* ifndef(TIME_MAX) */ +#ifndef TIME_MIN + +#ifdef TIME_T_IS_SIGNED + +#if (SIZEOF_TIME_T == SIZEOF_INT) +#define TIME_MIN ((time_t)INT_MIN) +#elif (SIZEOF_TIME_T == SIZEOF_LONG) +#define TIME_MIN ((time_t)LONG_MIN) +#elif (SIZEOF_TIME_T == 8) +#define TIME_MIN ((time_t)INT64_MIN) +#else +#error "Can't define (signed) TIME_MIN" +#endif + +#else +/* Unsigned case */ +#if (SIZEOF_TIME_T == 4) +#define TIME_MIN ((time_t)UINT32_MIN) +#elif (SIZEOF_TIME_T == 8) +#define TIME_MIN ((time_t)UINT64_MIN) +#else +#error "Can't define (unsigned) TIME_MIN" +#endif +#endif /* time_t_is_signed */ +#endif /* ifndef(TIME_MIN) */ + #ifndef SIZE_MAX #if (SIZEOF_SIZE_T == 4) #define SIZE_MAX UINT32_MAX diff --git a/src/or/config.c b/src/or/config.c index 7b42c9fdb3..413667a235 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -475,10 +475,40 @@ static config_var_t option_vars_[] = { V(TestingClientConsensusDownloadSchedule, CSV_INTERVAL, "0, 0, 60, " "300, 600, 1800, 3600, 3600, 3600, " "10800, 21600, 43200"), + /* With the TestingClientBootstrapConsensus*Download* below: + * Clients with only authorities will try: + * - 3 authorities over 10 seconds, then wait 60 minutes. + * Clients with authorities and fallbacks will try: + * - 2 authorities and 4 fallbacks over 21 seconds, then wait 60 minutes. + * Clients will also retry when an application request arrives. + * After a number of failed reqests, clients retry every 3 days + 1 hour. + * + * Clients used to try 2 authorities over 10 seconds, then wait for + * 60 minutes or an application request. + * + * When clients have authorities and fallbacks available, they use these + * schedules: (we stagger the times to avoid thundering herds) */ + V(TestingClientBootstrapConsensusAuthorityDownloadSchedule, CSV_INTERVAL, + "10, 11, 3600, 10800, 25200, 54000, 111600, 262800" /* 3 days + 1 hour */), + V(TestingClientBootstrapConsensusFallbackDownloadSchedule, CSV_INTERVAL, + "0, 1, 4, 11, 3600, 10800, 25200, 54000, 111600, 262800"), + /* When clients only have authorities available, they use this schedule: */ + V(TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule, CSV_INTERVAL, + "0, 3, 7, 3600, 10800, 25200, 54000, 111600, 262800"), + /* We don't want to overwhelm slow networks (or mirrors whose replies are + * blocked), but we also don't want to fail if only some mirrors are + * blackholed. Clients will try 3 directories simultaneously. + * (Relays never use simultaneous connections.) */ + V(TestingClientBootstrapConsensusMaxInProgressTries, UINT, "3"), V(TestingBridgeDownloadSchedule, CSV_INTERVAL, "3600, 900, 900, 3600"), V(TestingClientMaxIntervalWithoutRequest, INTERVAL, "10 minutes"), V(TestingDirConnectionMaxStall, INTERVAL, "5 minutes"), V(TestingConsensusMaxDownloadTries, UINT, "8"), + /* Since we try connections rapidly and simultaneously, we can afford + * to give up earlier. (This protects against overloading directories.) */ + V(TestingClientBootstrapConsensusMaxDownloadTries, UINT, "7"), + /* We want to give up much earlier if we're only using authorities. */ + V(TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries, UINT, "4"), V(TestingDescriptorMaxDownloadTries, UINT, "8"), V(TestingMicrodescMaxDownloadTries, UINT, "8"), V(TestingCertMaxDownloadTries, UINT, "8"), @@ -525,10 +555,18 @@ static const config_var_t testing_tor_network_defaults[] = { "15, 20, 30, 60"), V(TestingClientConsensusDownloadSchedule, CSV_INTERVAL, "0, 0, 5, 10, " "15, 20, 30, 60"), + V(TestingClientBootstrapConsensusAuthorityDownloadSchedule, CSV_INTERVAL, + "0, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 16, 32, 60"), + V(TestingClientBootstrapConsensusFallbackDownloadSchedule, CSV_INTERVAL, + "0, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 16, 32, 60"), + V(TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule, CSV_INTERVAL, + "0, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 16, 32, 60"), V(TestingBridgeDownloadSchedule, CSV_INTERVAL, "60, 30, 30, 60"), V(TestingClientMaxIntervalWithoutRequest, INTERVAL, "5 seconds"), V(TestingDirConnectionMaxStall, INTERVAL, "30 seconds"), V(TestingConsensusMaxDownloadTries, UINT, "80"), + V(TestingClientBootstrapConsensusMaxDownloadTries, UINT, "80"), + V(TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries, UINT, "80"), V(TestingDescriptorMaxDownloadTries, UINT, "80"), V(TestingMicrodescMaxDownloadTries, UINT, "80"), V(TestingCertMaxDownloadTries, UINT, "80"), @@ -3749,10 +3787,16 @@ options_validate(or_options_t *old_options, or_options_t *options, CHECK_DEFAULT(TestingClientDownloadSchedule); CHECK_DEFAULT(TestingServerConsensusDownloadSchedule); CHECK_DEFAULT(TestingClientConsensusDownloadSchedule); + CHECK_DEFAULT(TestingClientBootstrapConsensusAuthorityDownloadSchedule); + CHECK_DEFAULT(TestingClientBootstrapConsensusFallbackDownloadSchedule); + CHECK_DEFAULT(TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule); CHECK_DEFAULT(TestingBridgeDownloadSchedule); CHECK_DEFAULT(TestingClientMaxIntervalWithoutRequest); CHECK_DEFAULT(TestingDirConnectionMaxStall); CHECK_DEFAULT(TestingConsensusMaxDownloadTries); + CHECK_DEFAULT(TestingClientBootstrapConsensusMaxDownloadTries); + CHECK_DEFAULT(TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries); + CHECK_DEFAULT(TestingClientBootstrapConsensusMaxInProgressTries); CHECK_DEFAULT(TestingDescriptorMaxDownloadTries); CHECK_DEFAULT(TestingMicrodescMaxDownloadTries); CHECK_DEFAULT(TestingCertMaxDownloadTries); @@ -3827,11 +3871,41 @@ options_validate(or_options_t *old_options, or_options_t *options, } if (options->TestingConsensusMaxDownloadTries < 2) { - REJECT("TestingConsensusMaxDownloadTries must be greater than 1."); + REJECT("TestingConsensusMaxDownloadTries must be greater than 2."); } else if (options->TestingConsensusMaxDownloadTries > 800) { COMPLAIN("TestingConsensusMaxDownloadTries is insanely high."); } + if (options->TestingClientBootstrapConsensusMaxDownloadTries < 2) { + REJECT("TestingClientBootstrapConsensusMaxDownloadTries must be greater " + "than 2." + ); + } else if (options->TestingClientBootstrapConsensusMaxDownloadTries > 800) { + COMPLAIN("TestingClientBootstrapConsensusMaxDownloadTries is insanely " + "high."); + } + + if (options->TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries + < 2) { + REJECT("TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries must " + "be greater than 2." + ); + } else if ( + options->TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries + > 800) { + COMPLAIN("TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries is " + "insanely high."); + } + + if (options->TestingClientBootstrapConsensusMaxInProgressTries < 1) { + REJECT("TestingClientBootstrapConsensusMaxInProgressTries must be greater " + "than 0."); + } else if (options->TestingClientBootstrapConsensusMaxInProgressTries + > 100) { + COMPLAIN("TestingClientBootstrapConsensusMaxInProgressTries is insanely " + "high."); + } + if (options->TestingDescriptorMaxDownloadTries < 2) { REJECT("TestingDescriptorMaxDownloadTries must be greater than 1."); } else if (options->TestingDescriptorMaxDownloadTries > 800) { diff --git a/src/or/directory.c b/src/or/directory.c index 555462b325..0d2a8b2459 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -3443,26 +3443,54 @@ connection_dir_finished_connecting(dir_connection_t *conn) } /** Decide which download schedule we want to use based on descriptor type - * in dls and whether we are acting as directory server, and - * then return a list of int pointers defining download delays in seconds. - * Helper function for download_status_increment_failure() and - * download_status_reset(). */ + * in dls and options. + * Then return a list of int pointers defining download delays in seconds. + * Helper function for download_status_increment_failure(), + * download_status_reset(), and download_status_increment_attempt(). */ static const smartlist_t * -find_dl_schedule_and_len(download_status_t *dls, int server) +find_dl_schedule(download_status_t *dls, const or_options_t *options) { + /* XX/teor Replace with dir_server_mode from #12538 */ + const int dir_server = options->DirPort_set; + const int multi_d = networkstatus_consensus_can_use_multiple_directories( + options); + const int we_are_bootstrapping = networkstatus_consensus_is_boostrapping( + time(NULL)); + const int use_fallbacks = networkstatus_consensus_can_use_extra_fallbacks( + options); switch (dls->schedule) { case DL_SCHED_GENERIC: - if (server) - return get_options()->TestingServerDownloadSchedule; - else - return get_options()->TestingClientDownloadSchedule; + if (dir_server) { + return options->TestingServerDownloadSchedule; + } else { + return options->TestingClientDownloadSchedule; + } case DL_SCHED_CONSENSUS: - if (server) - return get_options()->TestingServerConsensusDownloadSchedule; - else - return get_options()->TestingClientConsensusDownloadSchedule; + if (!multi_d) { + return options->TestingServerConsensusDownloadSchedule; + } else { + if (we_are_bootstrapping) { + if (!use_fallbacks) { + /* A bootstrapping client without extra fallback directories */ + return + options->TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule; + } else if (dls->want_authority) { + /* A bootstrapping client with extra fallback directories, but + * connecting to an authority */ + return + options->TestingClientBootstrapConsensusAuthorityDownloadSchedule; + } else { + /* A bootstrapping client connecting to extra fallback directories + */ + return + options->TestingClientBootstrapConsensusFallbackDownloadSchedule; + } + } else { + return options->TestingClientConsensusDownloadSchedule; + } + } case DL_SCHED_BRIDGE: - return get_options()->TestingBridgeDownloadSchedule; + return options->TestingBridgeDownloadSchedule; default: tor_assert(0); } @@ -3471,54 +3499,168 @@ find_dl_schedule_and_len(download_status_t *dls, int server) return NULL; } -/** Called when an attempt to download dls has failed with HTTP status +/* Find the current delay for dls based on schedule. + * Set dls->next_attempt_at based on now, and return the delay. + * Helper for download_status_increment_failure and + * download_status_increment_attempt. */ +STATIC int +download_status_schedule_get_delay(download_status_t *dls, + const smartlist_t *schedule, + time_t now) +{ + tor_assert(dls); + tor_assert(schedule); + + int delay = INT_MAX; + uint8_t dls_schedule_position = (dls->increment_on + == DL_SCHED_INCREMENT_ATTEMPT + ? dls->n_download_attempts + : dls->n_download_failures); + + if (dls_schedule_position < smartlist_len(schedule)) + delay = *(int *)smartlist_get(schedule, dls_schedule_position); + else if (dls_schedule_position == IMPOSSIBLE_TO_DOWNLOAD) + delay = INT_MAX; + else + delay = *(int *)smartlist_get(schedule, smartlist_len(schedule) - 1); + + /* A negative delay makes no sense. Knowing that delay is + * non-negative allows us to safely do the wrapping check below. */ + tor_assert(delay >= 0); + + /* Avoid now+delay overflowing INT_MAX, by comparing with a subtraction + * that won't overflow (since delay is non-negative). */ + if (delay < INT_MAX && now <= INT_MAX - delay) { + dls->next_attempt_at = now+delay; + } else { + dls->next_attempt_at = TIME_MAX; + } + + return delay; +} + +/* Log a debug message about item, which increments on increment_action, has + * incremented dls_n_download_increments times. The message varies based on + * was_schedule_incremented (if not, not_incremented_response is logged), and + * the values of increment, dls_next_attempt_at, and now. + * Helper for download_status_increment_failure and + * download_status_increment_attempt. */ +static void +download_status_log_helper(const char *item, int was_schedule_incremented, + const char *increment_action, + const char *not_incremented_response, + uint8_t dls_n_download_increments, int increment, + time_t dls_next_attempt_at, time_t now) +{ + if (item) { + if (!was_schedule_incremented) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again %s.", + item, increment_action, (int)dls_n_download_increments, + not_incremented_response); + else if (increment == 0) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again immediately.", + item, increment_action, (int)dls_n_download_increments); + else if (dls_next_attempt_at < TIME_MAX) + log_debug(LD_DIR, "%s %s %d time(s); I'll try again in %d seconds.", + item, increment_action, (int)dls_n_download_increments, + (int)(dls_next_attempt_at-now)); + else + log_debug(LD_DIR, "%s %s %d time(s); Giving up for a while.", + item, increment_action, (int)dls_n_download_increments); + } +} + +/** Determine when a failed download attempt should be retried. + * Called when an attempt to download dls has failed with HTTP status * status_code. Increment the failure count (if the code indicates a - * real failure) and set dls-\>next_attempt_at to an appropriate time - * in the future. */ + * real failure, or if we're a server) and set dls-\>next_attempt_at to + * an appropriate time in the future and return it. + * If dls->increment_on is DL_SCHED_INCREMENT_ATTEMPT, increment the + * failure count, and return a time in the far future for the next attempt (to + * avoid an immediate retry). */ time_t download_status_increment_failure(download_status_t *dls, int status_code, const char *item, int server, time_t now) { - const smartlist_t *schedule; - int increment; + int increment = -1; tor_assert(dls); + + /* only count the failure if it's permanent, or we're a server */ if (status_code != 503 || server) { if (dls->n_download_failures < IMPOSSIBLE_TO_DOWNLOAD-1) ++dls->n_download_failures; } - schedule = find_dl_schedule_and_len(dls, server); + if (dls->increment_on == DL_SCHED_INCREMENT_FAILURE) { + /* We don't find out that a failure-based schedule has attempted a + * connection until that connection fails. + * We'll never find out about successful connections, but this doesn't + * matter, because schedules are reset after a successful download. + */ + if (dls->n_download_attempts < IMPOSSIBLE_TO_DOWNLOAD-1) + ++dls->n_download_attempts; - if (dls->n_download_failures < smartlist_len(schedule)) - increment = *(int *)smartlist_get(schedule, dls->n_download_failures); - else if (dls->n_download_failures == IMPOSSIBLE_TO_DOWNLOAD) - increment = INT_MAX; - else - increment = *(int *)smartlist_get(schedule, smartlist_len(schedule) - 1); - - if (increment < INT_MAX) - dls->next_attempt_at = now+increment; - else - dls->next_attempt_at = TIME_MAX; - - if (item) { - if (increment == 0) - log_debug(LD_DIR, "%s failed %d time(s); I'll try again immediately.", - item, (int)dls->n_download_failures); - else if (dls->next_attempt_at < TIME_MAX) - log_debug(LD_DIR, "%s failed %d time(s); I'll try again in %d seconds.", - item, (int)dls->n_download_failures, - (int)(dls->next_attempt_at-now)); - else - log_debug(LD_DIR, "%s failed %d time(s); Giving up for a while.", - item, (int)dls->n_download_failures); + /* only return a failure retry time if this schedule increments on failures + */ + const smartlist_t *schedule = find_dl_schedule(dls, get_options()); + increment = download_status_schedule_get_delay(dls, schedule, now); } + + download_status_log_helper(item, !dls->increment_on, "failed", + "concurrently", dls->n_download_failures, + increment, dls->next_attempt_at, now); + + if (dls->increment_on == DL_SCHED_INCREMENT_ATTEMPT) { + /* stop this schedule retrying on failure, it will launch concurrent + * connections instead */ + return TIME_MAX; + } else { + return dls->next_attempt_at; + } +} + +/** Determine when the next download attempt should be made when using an + * attempt-based (potentially concurrent) download schedule. + * Called when an attempt to download dls is being initiated. + * Increment the attempt count and set dls-\>next_attempt_at to an + * appropriate time in the future and return it. + * If dls->increment_on is DL_SCHED_INCREMENT_FAILURE, don't increment + * the attempts, and return a time in the far future (to avoid launching a + * concurrent attempt). */ +time_t +download_status_increment_attempt(download_status_t *dls, const char *item, + time_t now) +{ + int delay = -1; + tor_assert(dls); + + if (dls->increment_on == DL_SCHED_INCREMENT_FAILURE) { + /* this schedule should retry on failure, and not launch any concurrent + attempts */ + log_info(LD_BUG, "Tried to launch an attempt-based connection on a " + "failure-based schedule."); + return TIME_MAX; + } + + if (dls->n_download_attempts < IMPOSSIBLE_TO_DOWNLOAD-1) + ++dls->n_download_attempts; + + const smartlist_t *schedule = find_dl_schedule(dls, get_options()); + delay = download_status_schedule_get_delay(dls, schedule, now); + + download_status_log_helper(item, dls->increment_on, "attempted", + "on failure", dls->n_download_attempts, + delay, dls->next_attempt_at, now); + return dls->next_attempt_at; } /** Reset dls so that it will be considered downloadable * immediately, and/or to show that we don't need it anymore. * + * Must be called to initialise a download schedule, otherwise the zeroth item + * in the schedule will never be used. + * * (We find the zeroth element of the download schedule, and set * next_attempt_at to be the appropriate offset from 'now'. In most * cases this means setting it to 'now', so the item will be immediately @@ -3527,14 +3669,16 @@ download_status_increment_failure(download_status_t *dls, int status_code, void download_status_reset(download_status_t *dls) { - if (dls->n_download_failures == IMPOSSIBLE_TO_DOWNLOAD) + if (dls->n_download_failures == IMPOSSIBLE_TO_DOWNLOAD + || dls->n_download_attempts == IMPOSSIBLE_TO_DOWNLOAD) return; /* Don't reset this. */ - const smartlist_t *schedule = find_dl_schedule_and_len( - dls, get_options()->DirPort_set); + const smartlist_t *schedule = find_dl_schedule(dls, get_options()); dls->n_download_failures = 0; + dls->n_download_attempts = 0; dls->next_attempt_at = time(NULL) + *(int *)smartlist_get(schedule, 0); + /* Don't reset dls->want_authority or dls->increment_on */ } /** Return the number of failures on dls since the last success (if @@ -3545,6 +3689,22 @@ download_status_get_n_failures(const download_status_t *dls) return dls->n_download_failures; } +/** Return the number of attempts to download dls since the last success + * (if any). This can differ from download_status_get_n_failures() due to + * outstanding concurrent attempts. */ +int +download_status_get_n_attempts(const download_status_t *dls) +{ + return dls->n_download_attempts; +} + +/** Return the next time to attempt to download dls. */ +time_t +download_status_get_next_attempt_at(const download_status_t *dls) +{ + return dls->next_attempt_at; +} + /** Called when one or more routerdesc (or extrainfo, if was_extrainfo) * fetches have failed (with uppercase fingerprints listed in failed, * either as descriptor digests or as identity digests based on diff --git a/src/or/directory.h b/src/or/directory.h index bdcc1a23c6..4255868d3a 100644 --- a/src/or/directory.h +++ b/src/or/directory.h @@ -92,6 +92,8 @@ int router_supports_extrainfo(const char *identity_digest, int is_authority); time_t download_status_increment_failure(download_status_t *dls, int status_code, const char *item, int server, time_t now); +time_t download_status_increment_attempt(download_status_t *dls, + const char *item, time_t now); /** Increment the failure count of the download_status_t dls, with * the optional status code sc. */ #define download_status_failed(dls, sc) \ @@ -107,8 +109,9 @@ static INLINE int download_status_is_ready(download_status_t *dls, time_t now, int max_failures) { - return (dls->n_download_failures <= max_failures - && dls->next_attempt_at <= now); + int under_failure_limit = (dls->n_download_failures <= max_failures + && dls->n_download_attempts <= max_failures); + return (under_failure_limit && dls->next_attempt_at <= now); } static void download_status_mark_impossible(download_status_t *dl); @@ -117,9 +120,12 @@ static INLINE void download_status_mark_impossible(download_status_t *dl) { dl->n_download_failures = IMPOSSIBLE_TO_DOWNLOAD; + dl->n_download_attempts = IMPOSSIBLE_TO_DOWNLOAD; } int download_status_get_n_failures(const download_status_t *dls); +int download_status_get_n_attempts(const download_status_t *dls); +time_t download_status_get_next_attempt_at(const download_status_t *dls); #ifdef TOR_UNIT_TESTS /* Used only by directory.c and test_dir.c */ @@ -133,6 +139,9 @@ STATIC int directory_handle_command_get(dir_connection_t *conn, const char *headers, const char *req_body, size_t req_body_len); +STATIC int download_status_schedule_get_delay(download_status_t *dls, + const smartlist_t *schedule, + time_t now); #endif #endif diff --git a/src/or/main.c b/src/or/main.c index 527e2b1ffe..60957bd6ab 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -1876,18 +1876,29 @@ check_for_reachability_bw_callback(time_t now, const or_options_t *options) static int fetch_networkstatus_callback(time_t now, const or_options_t *options) { - /* 2c. Every minute (or every second if TestingTorNetwork), check - * whether we want to download any networkstatus documents. */ + /* 2c. Every minute (or every second if TestingTorNetwork, or during + * client bootstrap), check whether we want to download any networkstatus + * documents. */ /* How often do we check whether we should download network status * documents? */ -#define networkstatus_dl_check_interval(o) ((o)->TestingTorNetwork ? 1 : 60) + const int we_are_bootstrapping = networkstatus_consensus_is_boostrapping( + now); + const int prefer_mirrors = !directory_fetches_from_authorities( + get_options()); + int networkstatus_dl_check_interval = 60; + /* check more often when testing, or when bootstrapping from mirrors + * (connection limits prevent too many connections being made) */ + if (options->TestingTorNetwork + || (we_are_bootstrapping && prefer_mirrors)) { + networkstatus_dl_check_interval = 1; + } if (should_delay_dir_fetches(options, NULL)) return PERIODIC_EVENT_NO_UPDATE; update_networkstatus_downloads(now); - return networkstatus_dl_check_interval(options); + return networkstatus_dl_check_interval; } static int diff --git a/src/or/networkstatus.c b/src/or/networkstatus.c index 71a2c0f121..1d5b2f2723 100644 --- a/src/or/networkstatus.c +++ b/src/or/networkstatus.c @@ -85,8 +85,30 @@ static time_t time_to_download_next_consensus[N_CONSENSUS_FLAVORS]; /** Download status for the current consensus networkstatus. */ static download_status_t consensus_dl_status[N_CONSENSUS_FLAVORS] = { - { 0, 0, DL_SCHED_CONSENSUS }, - { 0, 0, DL_SCHED_CONSENSUS }, + { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_ANY_DIRSERVER, + DL_SCHED_INCREMENT_FAILURE }, + { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_ANY_DIRSERVER, + DL_SCHED_INCREMENT_FAILURE }, + }; + +#define N_CONSENSUS_BOOTSTRAP_SCHEDULES 2 +#define CONSENSUS_BOOTSTRAP_SOURCE_AUTHORITY 0 +#define CONSENSUS_BOOTSTRAP_SOURCE_ANY_DIRSERVER 1 + +/* Using DL_SCHED_INCREMENT_ATTEMPT on these schedules means that + * download_status_increment_failure won't increment these entries. + * However, any bootstrap connection failures that occur after we have + * a valid consensus will count against the failure counts on the non-bootstrap + * schedules. There should only be one of these, as all the others will have + * been cancelled. (This doesn't seem to be a significant issue.) */ +static download_status_t + consensus_bootstrap_dl_status[N_CONSENSUS_BOOTSTRAP_SCHEDULES] = + { + { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_AUTHORITY, + DL_SCHED_INCREMENT_ATTEMPT }, + /* During bootstrap, DL_WANT_ANY_DIRSERVER means "use fallbacks". */ + { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_ANY_DIRSERVER, + DL_SCHED_INCREMENT_ATTEMPT }, }; /** True iff we have logged a warning about this OR's version being older than @@ -97,6 +119,10 @@ static int have_warned_about_old_version = 0; static int have_warned_about_new_version = 0; static void routerstatus_list_update_named_server_map(void); +static void update_consensus_bootstrap_multiple_downloads( + time_t now, + const or_options_t *options, + int we_are_bootstrapping); /** Forget that we've warned about anything networkstatus-related, so we will * give fresh warnings if the same behavior happens again. */ @@ -122,6 +148,9 @@ networkstatus_reset_download_failures(void) for (i=0; i < N_CONSENSUS_FLAVORS; ++i) download_status_reset(&consensus_dl_status[i]); + + for (i=0; i < N_CONSENSUS_BOOTSTRAP_SCHEDULES; ++i) + download_status_reset(&consensus_bootstrap_dl_status[i]); } /** Read every cached v3 consensus networkstatus from the disk. */ @@ -734,6 +763,55 @@ we_want_to_fetch_flavor(const or_options_t *options, int flavor) * fetching certs before we check whether there is a better one? */ #define DELAY_WHILE_FETCHING_CERTS (20*60) +/* Check if a downloaded consensus flavor should still wait for certificates + * to download now. + * If so, return 1. If not, fail dls and return 0. */ +static int +check_consensus_waiting_for_certs(int flavor, time_t now, + download_status_t *dls) +{ + consensus_waiting_for_certs_t *waiting; + + /* We should always have a known flavor, because we_want_to_fetch_flavor() + * filters out unknown flavors. */ + tor_assert(flavor >= 0 && flavor < N_CONSENSUS_FLAVORS); + + waiting = &consensus_waiting_for_certs[flavor]; + if (waiting->consensus) { + /* XXXX make sure this doesn't delay sane downloads. */ + if (waiting->set_at + DELAY_WHILE_FETCHING_CERTS > now) { + return 1; + } else { + if (!waiting->dl_failed) { + download_status_failed(dls, 0); + waiting->dl_failed=1; + } + } + } + + return 0; +} + +/* Return the maximum download tries for a consensus, based on options and + * whether we_are_bootstrapping. */ +static int +consensus_max_download_tries(const or_options_t *options, + int we_are_bootstrapping) +{ + int use_fallbacks = networkstatus_consensus_can_use_extra_fallbacks(options); + + if (we_are_bootstrapping) { + if (use_fallbacks) { + return options->TestingClientBootstrapConsensusMaxDownloadTries; + } else { + return + options->TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries; + } + } + + return options->TestingConsensusMaxDownloadTries; +} + /** If we want to download a fresh consensus, launch a new download as * appropriate. */ static void @@ -741,12 +819,19 @@ update_consensus_networkstatus_downloads(time_t now) { int i; const or_options_t *options = get_options(); + const int we_are_bootstrapping = networkstatus_consensus_is_boostrapping( + now); + const int use_multi_conn = + networkstatus_consensus_can_use_multiple_directories(options); + + if (should_delay_dir_fetches(options, NULL)) + return; for (i=0; i < N_CONSENSUS_FLAVORS; ++i) { /* XXXX need some way to download unknown flavors if we are caching. */ const char *resource; - consensus_waiting_for_certs_t *waiting; networkstatus_t *c; + int max_in_progress_conns = 1; if (! we_want_to_fetch_flavor(options, i)) continue; @@ -762,35 +847,166 @@ update_consensus_networkstatus_downloads(time_t now) resource = networkstatus_get_flavor_name(i); - /* Let's make sure we remembered to update consensus_dl_status */ - tor_assert(consensus_dl_status[i].schedule == DL_SCHED_CONSENSUS); - - if (!download_status_is_ready(&consensus_dl_status[i], now, - options->TestingConsensusMaxDownloadTries)) - continue; /* We failed downloading a consensus too recently. */ - if (connection_dir_get_by_purpose_and_resource( - DIR_PURPOSE_FETCH_CONSENSUS, resource)) - continue; /* There's an in-progress download.*/ - - waiting = &consensus_waiting_for_certs[i]; - if (waiting->consensus) { - /* XXXX make sure this doesn't delay sane downloads. */ - if (waiting->set_at + DELAY_WHILE_FETCHING_CERTS > now) { - continue; /* We're still getting certs for this one. */ - } else { - if (!waiting->dl_failed) { - download_status_failed(&consensus_dl_status[i], 0); - waiting->dl_failed=1; - } - } + /* Check if we already have enough connections in progress */ + if (we_are_bootstrapping) { + max_in_progress_conns = + options->TestingClientBootstrapConsensusMaxInProgressTries; + } + if (connection_dir_count_by_purpose_and_resource( + DIR_PURPOSE_FETCH_CONSENSUS, + resource) + >= max_in_progress_conns) { + continue; } - log_info(LD_DIR, "Launching %s networkstatus consensus download.", - networkstatus_get_flavor_name(i)); + /* Check if we want to launch another download for a usable consensus. + * Only used during bootstrap. */ + if (we_are_bootstrapping && use_multi_conn + && i == usable_consensus_flavor()) { + + /* Check if we're already downloading a usable consensus */ + int consens_conn_count = + connection_dir_count_by_purpose_and_resource( + DIR_PURPOSE_FETCH_CONSENSUS, + resource); + int connect_consens_conn_count = + connection_dir_count_by_purpose_resource_and_state( + DIR_PURPOSE_FETCH_CONSENSUS, + resource, + DIR_CONN_STATE_CONNECTING); + + if (i == usable_consensus_flavor() + && connect_consens_conn_count < consens_conn_count) { + continue; + } + + /* Make multiple connections for a bootstrap consensus download */ + update_consensus_bootstrap_multiple_downloads(now, options, + we_are_bootstrapping); + } else { + /* Check if we failed downloading a consensus too recently */ + int max_dl_tries = consensus_max_download_tries(options, + we_are_bootstrapping); + + /* Let's make sure we remembered to update consensus_dl_status */ + tor_assert(consensus_dl_status[i].schedule == DL_SCHED_CONSENSUS); + + if (!download_status_is_ready(&consensus_dl_status[i], + now, + max_dl_tries)) { + continue; + } + + /* Check if we're waiting for certificates to download */ + if (check_consensus_waiting_for_certs(i, now, &consensus_dl_status[i])) + continue; + + /* Try the requested attempt */ + log_info(LD_DIR, "Launching %s standard networkstatus consensus " + "download.", networkstatus_get_flavor_name(i)); + directory_get_from_dirserver(DIR_PURPOSE_FETCH_CONSENSUS, + ROUTER_PURPOSE_GENERAL, resource, + PDS_RETRY_IF_NO_SERVERS, + consensus_dl_status[i].want_authority); + } + } +} + +/** When we're bootstrapping, launch one or more consensus download + * connections, if schedule indicates connection(s) should be made after now. + * If is_authority, connect to an authority, otherwise, use a fallback + * directory mirror. + */ +static void +update_consensus_bootstrap_attempt_downloads( + time_t now, + const or_options_t *options, + int we_are_bootstrapping, + download_status_t *dls, + download_want_authority_t want_authority) +{ + int max_dl_tries = consensus_max_download_tries(options, + we_are_bootstrapping); + const char *resource = networkstatus_get_flavor_name( + usable_consensus_flavor()); + + /* Let's make sure we remembered to update schedule */ + tor_assert(dls->schedule == DL_SCHED_CONSENSUS); + + /* Allow for multiple connections in the same second, if the schedule value + * is 0. */ + while (download_status_is_ready(dls, now, max_dl_tries)) { + log_info(LD_DIR, "Launching %s bootstrap %s networkstatus consensus " + "download.", resource, (want_authority == DL_WANT_AUTHORITY + ? "authority" + : "mirror")); directory_get_from_dirserver(DIR_PURPOSE_FETCH_CONSENSUS, ROUTER_PURPOSE_GENERAL, resource, - PDS_RETRY_IF_NO_SERVERS); + PDS_RETRY_IF_NO_SERVERS, want_authority); + /* schedule the next attempt */ + download_status_increment_attempt(dls, resource, now); + } +} + +/** If we're bootstrapping, check the connection schedules and see if we want + * to make additional, potentially concurrent, consensus download + * connections. + * Only call when bootstrapping, and when we want to make additional + * connections. Only nodes that satisfy + * networkstatus_consensus_can_use_multiple_directories make additonal + * connections. + */ +static void +update_consensus_bootstrap_multiple_downloads(time_t now, + const or_options_t *options, + int we_are_bootstrapping) +{ + const int usable_flavor = usable_consensus_flavor(); + + /* make sure we can use multiple connections */ + if (!networkstatus_consensus_can_use_multiple_directories(options)) { + return; + } + + /* If we've managed to validate a usable consensus, don't make additonal + * connections. */ + if (!we_are_bootstrapping) { + return; + } + + /* Launch concurrent consensus download attempt(s) based on the mirror and + * authority schedules. Try the mirror first - this makes it slightly more + * likely that we'll connect to the fallback first, and then end the + * authority connection attempt. */ + + /* If a consensus download fails because it's waiting for certificates, + * we'll fail both the authority and fallback schedules. This is better than + * failing only one of the schedules, and having the other continue + * unchecked. + */ + + /* If we don't have or can't use extra fallbacks, don't try them. */ + if (networkstatus_consensus_can_use_extra_fallbacks(options)) { + download_status_t *dls_f = + &consensus_bootstrap_dl_status[CONSENSUS_BOOTSTRAP_SOURCE_ANY_DIRSERVER]; + + if (!check_consensus_waiting_for_certs(usable_flavor, now, dls_f)) { + /* During bootstrap, DL_WANT_ANY_DIRSERVER means "use fallbacks". */ + update_consensus_bootstrap_attempt_downloads(now, options, + we_are_bootstrapping, dls_f, + DL_WANT_ANY_DIRSERVER); + } + } + + /* Now try an authority. */ + download_status_t *dls_a = + &consensus_bootstrap_dl_status[CONSENSUS_BOOTSTRAP_SOURCE_AUTHORITY]; + + if (!check_consensus_waiting_for_certs(usable_flavor, now, dls_a)) { + update_consensus_bootstrap_attempt_downloads(now, options, + we_are_bootstrapping, dls_a, + DL_WANT_AUTHORITY); } } @@ -1057,6 +1273,66 @@ networkstatus_get_reasonably_live_consensus(time_t now, int flavor) return NULL; } +/** Check if we're bootstrapping a consensus download. This means that we are + * only using the authorities and fallback directory mirrors to download the + * consensus flavour we'll use. */ +int +networkstatus_consensus_is_boostrapping(time_t now) +{ + /* If we don't have a consensus, we must still be bootstrapping */ + return !networkstatus_get_reasonably_live_consensus( + now, + usable_consensus_flavor()); +} + +/** Check if we can use multiple directories for a consensus download. + * Only clients (including bridges, but excluding bridge clients) benefit + * from multiple simultaneous consensus downloads. */ +int +networkstatus_consensus_can_use_multiple_directories( + const or_options_t *options) +{ + /* If we are a client, bridge, bridge client, or hidden service */ + return (!directory_fetches_from_authorities(options)); +} + +/** Check if we can use fallback directory mirrors for a consensus download. + * Only clients that have a list of additional fallbacks can use fallbacks. */ +int +networkstatus_consensus_can_use_extra_fallbacks(const or_options_t *options) +{ + /* If we are a client, and we have additional mirrors, we can use them. + * The list length comparisons are a quick way to check if we have any + * non-authority fallback directories. If we ever have any authorities that + * aren't fallback directories, we will need to change this code. */ + return (!directory_fetches_from_authorities(options) + && (smartlist_len(router_get_fallback_dir_servers()) + > smartlist_len(router_get_trusted_dir_servers()))); +} + +/* Is tor currently downloading a consensus of the usable flavor? */ +int +networkstatus_consensus_is_downloading_usable_flavor(void) +{ + const char *usable_resource = networkstatus_get_flavor_name( + usable_consensus_flavor()); + const int consens_conn_usable_count = + connection_dir_count_by_purpose_and_resource( + DIR_PURPOSE_FETCH_CONSENSUS, + usable_resource); + + const int connect_consens_conn_usable_count = + connection_dir_count_by_purpose_resource_and_state( + DIR_PURPOSE_FETCH_CONSENSUS, + usable_resource, + DIR_CONN_STATE_CONNECTING); + if (connect_consens_conn_usable_count < consens_conn_usable_count) { + return 1; + } + + return 0; +} + /** Given two router status entries for the same router identity, return 1 if * if the contents have changed between them. Otherwise, return 0. */ static int diff --git a/src/or/networkstatus.h b/src/or/networkstatus.h index d6e9e37013..d44022c80c 100644 --- a/src/or/networkstatus.h +++ b/src/or/networkstatus.h @@ -70,6 +70,13 @@ MOCK_DECL(networkstatus_t *,networkstatus_get_latest_consensus_by_flavor, networkstatus_t *networkstatus_get_live_consensus(time_t now); networkstatus_t *networkstatus_get_reasonably_live_consensus(time_t now, int flavor); +int networkstatus_consensus_is_boostrapping(time_t now); +int networkstatus_consensus_can_use_multiple_directories( + const or_options_t *options); +int networkstatus_consensus_can_use_extra_fallbacks( + const or_options_t *options); +int networkstatus_consensus_is_downloading_usable_flavor(void); + #define NSSET_FROM_CACHE 1 #define NSSET_WAS_WAITING_FOR_CERTS 2 #define NSSET_DONT_DOWNLOAD_CERTS 4 diff --git a/src/or/or.h b/src/or/or.h index c5596e3a58..850a6f9390 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1946,8 +1946,8 @@ typedef enum { } saved_location_t; #define saved_location_bitfield_t ENUM_BF(saved_location_t) -/** Enumeration: what kind of download schedule are we using for a given - * object? */ +/** Enumeration: what directory object is being downloaded? + * This determines which schedule is selected to perform the download. */ typedef enum { DL_SCHED_GENERIC = 0, DL_SCHED_CONSENSUS = 1, @@ -1955,24 +1955,74 @@ typedef enum { } download_schedule_t; #define download_schedule_bitfield_t ENUM_BF(download_schedule_t) -/** Enumeration: do we want to try an authority or a fallback directory - * mirror for our download? */ +/** Enumeration: is the download schedule for downloading from an authority, + * or from any available directory mirror? + * During bootstrap, "any" means a fallback (or an authority, if there + * are no fallbacks). + * When we have a valid consensus, "any" means any directory server. */ typedef enum { - DL_WANT_FALLBACK = 0, + DL_WANT_ANY_DIRSERVER = 0, DL_WANT_AUTHORITY = 1, } download_want_authority_t; #define download_want_authority_bitfield_t \ ENUM_BF(download_want_authority_t) -/** Information about our plans for retrying downloads for a downloadable - * object. */ -typedef struct download_status_t { - time_t next_attempt_at; /**< When should we try downloading this descriptor - * again? */ - uint8_t n_download_failures; /**< Number of failures trying to download the - * most recent descriptor. */ - download_schedule_bitfield_t schedule : 8; +/** Enumeration: do we want to increment the schedule position each time a + * connection is attempted (these attempts can be concurrent), or do we want + * to increment the schedule position after a connection fails? */ +typedef enum { + DL_SCHED_INCREMENT_FAILURE = 0, + DL_SCHED_INCREMENT_ATTEMPT = 1, +} download_schedule_increment_t; +#define download_schedule_increment_bitfield_t \ + ENUM_BF(download_schedule_increment_t) +/** Information about our plans for retrying downloads for a downloadable + * directory object. + * Each type of downloadable directory object has a corresponding retry + * schedule, which can be different depending on whether the object is + * being downloaded from an authority or a mirror (want_authority). + * next_attempt_at contains the next time we will attempt to download + * the object. + * For schedules that increment_on failure, n_download_failures + * is used to determine the position in the schedule. (Each schedule is a + * smartlist of integer delays, parsed from a CSV option.) Every time a + * connection attempt fails, n_download_failures is incremented, + * the new delay value is looked up from the schedule, and + * next_attempt_at is set delay seconds from the time the previous + * connection failed. Therefore, at most one failure-based connection can be + * in progress for each download_status_t. + * For schedules that increment_on attempt, n_download_attempts + * is used to determine the position in the schedule. Every time a + * connection attempt is made, n_download_attempts is incremented, + * the new delay value is looked up from the schedule, and + * next_attempt_at is set delay seconds from the time the previous + * connection was attempted. Therefore, multiple concurrent attempted-based + * connections can be in progress for each download_status_t. + * After an object is successfully downloaded, any other concurrent connections + * are terminated. A new schedule which starts at position 0 is used for + * subsequent downloads of the same object. + */ +typedef struct download_status_t { + time_t next_attempt_at; /**< When should we try downloading this object + * again? */ + uint8_t n_download_failures; /**< Number of failed downloads of the most + * recent object, since the last success. */ + uint8_t n_download_attempts; /**< Number of (potentially concurrent) attempts + * to download the most recent object, since + * the last success. */ + download_schedule_bitfield_t schedule : 8; /**< What kind of object is being + * downloaded? This determines the + * schedule used for the download. + */ + download_want_authority_bitfield_t want_authority : 1; /**< Is the download + * happening from an authority + * or a mirror? This determines + * the schedule used for the + * download. */ + download_schedule_increment_bitfield_t increment_on : 1; /**< does this + * schedule increment on each attempt, + * or after each failure? */ } download_status_t; /** If n_download_failures is this high, the download can never happen. */ @@ -4078,6 +4128,36 @@ typedef struct { * on testing networks. */ smartlist_t *TestingClientConsensusDownloadSchedule; + /** Schedule for when clients should download consensuses from authorities + * if they are bootstrapping (that is, they don't have a usable, reasonably + * live consensus). Only used by clients fetching from a list of fallback + * directory mirrors. + * + * This schedule is incremented by (potentially concurrent) connection + * attempts, unlike other schedules, which are incremented by connection + * failures. Only altered on testing networks. */ + smartlist_t *TestingClientBootstrapConsensusAuthorityDownloadSchedule; + + /** Schedule for when clients should download consensuses from fallback + * directory mirrors if they are bootstrapping (that is, they don't have a + * usable, reasonably live consensus). Only used by clients fetching from a + * list of fallback directory mirrors. + * + * This schedule is incremented by (potentially concurrent) connection + * attempts, unlike other schedules, which are incremented by connection + * failures. Only altered on testing networks. */ + smartlist_t *TestingClientBootstrapConsensusFallbackDownloadSchedule; + + /** Schedule for when clients should download consensuses from authorities + * if they are bootstrapping (that is, they don't have a usable, reasonably + * live consensus). Only used by clients which don't have or won't fetch + * from a list of fallback directory mirrors. + * + * This schedule is incremented by (potentially concurrent) connection + * attempts, unlike other schedules, which are incremented by connection + * failures. Only altered on testing networks. */ + smartlist_t *TestingClientBootstrapConsensusAuthorityOnlyDownloadSchedule; + /** Schedule for when clients should download bridge descriptors. Only * altered on testing networks. */ smartlist_t *TestingBridgeDownloadSchedule; @@ -4095,6 +4175,21 @@ typedef struct { * up? Only altered on testing networks. */ int TestingConsensusMaxDownloadTries; + /** How many times will a client try to fetch a consensus while + * bootstrapping using a list of fallback directories, before it gives up? + * Only altered on testing networks. */ + int TestingClientBootstrapConsensusMaxDownloadTries; + + /** How many times will a client try to fetch a consensus while + * bootstrapping using only a list of authorities, before it gives up? + * Only altered on testing networks. */ + int TestingClientBootstrapConsensusAuthorityOnlyMaxDownloadTries; + + /** How many simultaneous in-progress connections will we make when trying + * to fetch a consensus before we wait for one to complete, timeout, or + * error out? Only altered on testing networks. */ + int TestingClientBootstrapConsensusMaxInProgressTries; + /** How many times will we try to download a router's descriptor before * giving up? Only altered on testing networks. */ int TestingDescriptorMaxDownloadTries; diff --git a/src/or/routerlist.c b/src/or/routerlist.c index ca5105807e..0027a04cb5 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -900,7 +900,7 @@ authority_certs_fetch_missing(networkstatus_t *status, time_t now) /* XXX - do we want certs from authorities or mirrors? - teor */ directory_get_from_dirserver(DIR_PURPOSE_FETCH_CERTIFICATE, 0, resource, PDS_RETRY_IF_NO_SERVERS, - DL_WANT_FALLBACK); + DL_WANT_ANY_DIRSERVER); tor_free(resource); } /* else we didn't add any: they were all pending */ @@ -946,7 +946,7 @@ authority_certs_fetch_missing(networkstatus_t *status, time_t now) /* XXX - do we want certs from authorities or mirrors? - teor */ directory_get_from_dirserver(DIR_PURPOSE_FETCH_CERTIFICATE, 0, resource, PDS_RETRY_IF_NO_SERVERS, - DL_WANT_FALLBACK); + DL_WANT_ANY_DIRSERVER); tor_free(resource); } /* else they were all pending */ @@ -4380,14 +4380,14 @@ MOCK_IMPL(STATIC void, initiate_descriptor_downloads, tor_free(cp); if (source) { - /* We know which authority we want. */ + /* We know which authority or directory mirror we want. */ directory_initiate_command_routerstatus(source, purpose, ROUTER_PURPOSE_GENERAL, DIRIND_ONEHOP, resource, NULL, 0, 0); } else { directory_get_from_dirserver(purpose, ROUTER_PURPOSE_GENERAL, resource, - pds_flags, DL_WANT_FALLBACK); + pds_flags, DL_WANT_ANY_DIRSERVER); } tor_free(resource); } @@ -4669,9 +4669,14 @@ launch_dummy_descriptor_download_as_needed(time_t now, last_descriptor_download_attempted + DUMMY_DOWNLOAD_INTERVAL < now && last_dummy_download + DUMMY_DOWNLOAD_INTERVAL < now) { last_dummy_download = now; + /* XX/teor - do we want an authority here, because they are less likely + * to give us the wrong address? (See #17782) + * I'm leaving the previous behaviour intact, because I don't like + * the idea of some relays contacting an authority every 20 minutes. */ directory_get_from_dirserver(DIR_PURPOSE_FETCH_SERVERDESC, ROUTER_PURPOSE_GENERAL, "authority.z", - PDS_RETRY_IF_NO_SERVERS, DL_WANT_FALLBACK); + PDS_RETRY_IF_NO_SERVERS, + DL_WANT_ANY_DIRSERVER); } } diff --git a/src/test/test_dir.c b/src/test/test_dir.c index 855746e749..ce639b644f 100644 --- a/src/test/test_dir.c +++ b/src/test/test_dir.c @@ -3494,6 +3494,435 @@ test_dir_packages(void *arg) tor_free(res); } +static void +test_dir_download_status_schedule(void *arg) +{ + (void)arg; + download_status_t dls_failure = { 0, 0, 0, DL_SCHED_GENERIC, + DL_WANT_AUTHORITY, + DL_SCHED_INCREMENT_FAILURE }; + download_status_t dls_attempt = { 0, 0, 0, DL_SCHED_CONSENSUS, + DL_WANT_ANY_DIRSERVER, + DL_SCHED_INCREMENT_ATTEMPT}; + download_status_t dls_bridge = { 0, 0, 0, DL_SCHED_BRIDGE, + DL_WANT_AUTHORITY, + DL_SCHED_INCREMENT_FAILURE}; + int increment = -1; + int expected_increment = -1; + time_t current_time = time(NULL); + int delay1 = -1; + int delay2 = -1; + smartlist_t *schedule = smartlist_new(); + + /* Make a dummy schedule */ + smartlist_add(schedule, (void *)&delay1); + smartlist_add(schedule, (void *)&delay2); + + /* check a range of values */ + delay1 = 1000; + increment = download_status_schedule_get_delay(&dls_failure, + schedule, + TIME_MIN); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_failure.next_attempt_at == TIME_MIN + expected_increment); + +#if TIME_T_IS_SIGNED + delay1 = INT_MAX; + increment = download_status_schedule_get_delay(&dls_failure, + schedule, + -1); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_failure.next_attempt_at == TIME_MAX); +#endif + + delay1 = 0; + increment = download_status_schedule_get_delay(&dls_attempt, + schedule, + 0); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_attempt.next_attempt_at == 0 + expected_increment); + + delay1 = 1000; + increment = download_status_schedule_get_delay(&dls_attempt, + schedule, + 1); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_attempt.next_attempt_at == 1 + expected_increment); + + delay1 = INT_MAX; + increment = download_status_schedule_get_delay(&dls_bridge, + schedule, + current_time); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_bridge.next_attempt_at == TIME_MAX); + + delay1 = 1; + increment = download_status_schedule_get_delay(&dls_bridge, + schedule, + TIME_MAX); + expected_increment = delay1; + tt_assert(increment == expected_increment); + tt_assert(dls_bridge.next_attempt_at == TIME_MAX); + + /* see what happens when we reach the end */ + dls_attempt.n_download_attempts++; + dls_bridge.n_download_failures++; + + delay2 = 100; + increment = download_status_schedule_get_delay(&dls_attempt, + schedule, + current_time); + expected_increment = delay2; + tt_assert(increment == expected_increment); + tt_assert(dls_attempt.next_attempt_at == current_time + delay2); + + delay2 = 1; + increment = download_status_schedule_get_delay(&dls_bridge, + schedule, + current_time); + expected_increment = delay2; + tt_assert(increment == expected_increment); + tt_assert(dls_bridge.next_attempt_at == current_time + delay2); + + /* see what happens when we try to go off the end */ + dls_attempt.n_download_attempts++; + dls_bridge.n_download_failures++; + + delay2 = 5; + increment = download_status_schedule_get_delay(&dls_attempt, + schedule, + current_time); + expected_increment = delay2; + tt_assert(increment == expected_increment); + tt_assert(dls_attempt.next_attempt_at == current_time + delay2); + + delay2 = 17; + increment = download_status_schedule_get_delay(&dls_bridge, + schedule, + current_time); + expected_increment = delay2; + tt_assert(increment == expected_increment); + tt_assert(dls_bridge.next_attempt_at == current_time + delay2); + + /* see what happens when we reach IMPOSSIBLE_TO_DOWNLOAD */ + dls_attempt.n_download_attempts = IMPOSSIBLE_TO_DOWNLOAD; + dls_bridge.n_download_failures = IMPOSSIBLE_TO_DOWNLOAD; + + delay2 = 35; + increment = download_status_schedule_get_delay(&dls_attempt, + schedule, + current_time); + expected_increment = INT_MAX; + tt_assert(increment == expected_increment); + tt_assert(dls_attempt.next_attempt_at == TIME_MAX); + + delay2 = 99; + increment = download_status_schedule_get_delay(&dls_bridge, + schedule, + current_time); + expected_increment = INT_MAX; + tt_assert(increment == expected_increment); + tt_assert(dls_bridge.next_attempt_at == TIME_MAX); + + done: + /* the pointers in schedule are allocated on the stack */ + smartlist_free(schedule); +} + +static void +test_dir_download_status_increment(void *arg) +{ + (void)arg; + download_status_t dls_failure = { 0, 0, 0, DL_SCHED_GENERIC, + DL_WANT_AUTHORITY, + DL_SCHED_INCREMENT_FAILURE }; + download_status_t dls_attempt = { 0, 0, 0, DL_SCHED_BRIDGE, + DL_WANT_ANY_DIRSERVER, + DL_SCHED_INCREMENT_ATTEMPT}; + int delay0 = -1; + int delay1 = -1; + int delay2 = -1; + smartlist_t *schedule = smartlist_new(); + or_options_t test_options; + time_t next_at = TIME_MAX; + time_t current_time = time(NULL); + + /* Provide some values for the schedule */ + delay0 = 10; + delay1 = 99; + delay2 = 20; + + /* Make the schedule */ + smartlist_add(schedule, (void *)&delay0); + smartlist_add(schedule, (void *)&delay1); + smartlist_add(schedule, (void *)&delay2); + + /* Put it in the options */ + mock_options = &test_options; + reset_options(mock_options, &mock_get_options_calls); + mock_options->TestingClientDownloadSchedule = schedule; + mock_options->TestingBridgeDownloadSchedule = schedule; + + MOCK(get_options, mock_get_options); + + /* Check that a failure reset works */ + mock_get_options_calls = 0; + download_status_reset(&dls_failure); + /* we really want to test that it's equal to time(NULL) + delay0, but that's + * an unrealiable test, because time(NULL) might change. */ + tt_assert(download_status_get_next_attempt_at(&dls_failure) + >= current_time + delay0); + tt_assert(download_status_get_next_attempt_at(&dls_failure) + != TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_failure) == 0); + tt_assert(download_status_get_n_attempts(&dls_failure) == 0); + tt_assert(mock_get_options_calls >= 1); + + /* avoid timing inconsistencies */ + dls_failure.next_attempt_at = current_time + delay0; + + /* check that a reset schedule becomes ready at the right time */ + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay0 - 1, + 1) == 0); + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay0, + 1) == 1); + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay0 + 1, + 1) == 1); + + /* Check that a failure increment works */ + mock_get_options_calls = 0; + next_at = download_status_increment_failure(&dls_failure, 404, "test", 0, + current_time); + tt_assert(next_at == current_time + delay1); + tt_assert(download_status_get_n_failures(&dls_failure) == 1); + tt_assert(download_status_get_n_attempts(&dls_failure) == 1); + tt_assert(mock_get_options_calls >= 1); + + /* check that an incremented schedule becomes ready at the right time */ + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay1 - 1, + 1) == 0); + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay1, + 1) == 1); + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay1 + 1, + 1) == 1); + + /* check that a schedule isn't ready if it's had too many failures */ + tt_assert(download_status_is_ready(&dls_failure, + current_time + delay1 + 10, + 0) == 0); + + /* Check that failure increments don't happen on 503 for clients, but that + * attempt increments do. */ + mock_get_options_calls = 0; + next_at = download_status_increment_failure(&dls_failure, 503, "test", 0, + current_time); + tt_assert(next_at == current_time + delay1); + tt_assert(download_status_get_n_failures(&dls_failure) == 1); + tt_assert(download_status_get_n_attempts(&dls_failure) == 2); + tt_assert(mock_get_options_calls >= 1); + + /* Check that failure increments do happen on 503 for servers */ + mock_get_options_calls = 0; + next_at = download_status_increment_failure(&dls_failure, 503, "test", 1, + current_time); + tt_assert(next_at == current_time + delay2); + tt_assert(download_status_get_n_failures(&dls_failure) == 2); + tt_assert(download_status_get_n_attempts(&dls_failure) == 3); + tt_assert(mock_get_options_calls >= 1); + + /* Check what happens when we run off the end of the schedule */ + mock_get_options_calls = 0; + next_at = download_status_increment_failure(&dls_failure, 404, "test", 0, + current_time); + tt_assert(next_at == current_time + delay2); + tt_assert(download_status_get_n_failures(&dls_failure) == 3); + tt_assert(download_status_get_n_attempts(&dls_failure) == 4); + tt_assert(mock_get_options_calls >= 1); + + /* Check what happens when we hit the failure limit */ + mock_get_options_calls = 0; + download_status_mark_impossible(&dls_failure); + next_at = download_status_increment_failure(&dls_failure, 404, "test", 0, + current_time); + tt_assert(next_at == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_failure) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(download_status_get_n_attempts(&dls_failure) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(mock_get_options_calls >= 1); + + /* Check that a failure reset doesn't reset at the limit */ + mock_get_options_calls = 0; + download_status_reset(&dls_failure); + tt_assert(download_status_get_next_attempt_at(&dls_failure) + == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_failure) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(download_status_get_n_attempts(&dls_failure) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(mock_get_options_calls == 0); + + /* Check that a failure reset resets just before the limit */ + mock_get_options_calls = 0; + dls_failure.n_download_failures = IMPOSSIBLE_TO_DOWNLOAD - 1; + dls_failure.n_download_attempts = IMPOSSIBLE_TO_DOWNLOAD - 1; + download_status_reset(&dls_failure); + /* we really want to test that it's equal to time(NULL) + delay0, but that's + * an unrealiable test, because time(NULL) might change. */ + tt_assert(download_status_get_next_attempt_at(&dls_failure) + >= current_time + delay0); + tt_assert(download_status_get_next_attempt_at(&dls_failure) + != TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_failure) == 0); + tt_assert(download_status_get_n_attempts(&dls_failure) == 0); + tt_assert(mock_get_options_calls >= 1); + + /* Check that failure increments do happen on attempt-based schedules, + * but that the retry is set at the end of time */ + mock_get_options_calls = 0; + next_at = download_status_increment_failure(&dls_attempt, 404, "test", 0, + current_time); + tt_assert(next_at == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_attempt) == 1); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 0); + tt_assert(mock_get_options_calls == 0); + + /* Check that an attempt reset works */ + mock_get_options_calls = 0; + download_status_reset(&dls_attempt); + /* we really want to test that it's equal to time(NULL) + delay0, but that's + * an unrealiable test, because time(NULL) might change. */ + tt_assert(download_status_get_next_attempt_at(&dls_attempt) + >= current_time + delay0); + tt_assert(download_status_get_next_attempt_at(&dls_attempt) + != TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_attempt) == 0); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 0); + tt_assert(mock_get_options_calls >= 1); + + /* avoid timing inconsistencies */ + dls_attempt.next_attempt_at = current_time + delay0; + + /* check that a reset schedule becomes ready at the right time */ + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay0 - 1, + 1) == 0); + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay0, + 1) == 1); + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay0 + 1, + 1) == 1); + + /* Check that an attempt increment works */ + mock_get_options_calls = 0; + next_at = download_status_increment_attempt(&dls_attempt, "test", + current_time); + tt_assert(next_at == current_time + delay1); + tt_assert(download_status_get_n_failures(&dls_attempt) == 0); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 1); + tt_assert(mock_get_options_calls >= 1); + + /* check that an incremented schedule becomes ready at the right time */ + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay1 - 1, + 1) == 0); + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay1, + 1) == 1); + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay1 + 1, + 1) == 1); + + /* check that a schedule isn't ready if it's had too many attempts */ + tt_assert(download_status_is_ready(&dls_attempt, + current_time + delay1 + 10, + 0) == 0); + + /* Check what happens when we reach then run off the end of the schedule */ + mock_get_options_calls = 0; + next_at = download_status_increment_attempt(&dls_attempt, "test", + current_time); + tt_assert(next_at == current_time + delay2); + tt_assert(download_status_get_n_failures(&dls_attempt) == 0); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 2); + tt_assert(mock_get_options_calls >= 1); + + mock_get_options_calls = 0; + next_at = download_status_increment_attempt(&dls_attempt, "test", + current_time); + tt_assert(next_at == current_time + delay2); + tt_assert(download_status_get_n_failures(&dls_attempt) == 0); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 3); + tt_assert(mock_get_options_calls >= 1); + + /* Check what happens when we hit the attempt limit */ + mock_get_options_calls = 0; + download_status_mark_impossible(&dls_attempt); + next_at = download_status_increment_attempt(&dls_attempt, "test", + current_time); + tt_assert(next_at == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_attempt) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(download_status_get_n_attempts(&dls_attempt) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(mock_get_options_calls >= 1); + + /* Check that an attempt reset doesn't reset at the limit */ + mock_get_options_calls = 0; + download_status_reset(&dls_attempt); + tt_assert(download_status_get_next_attempt_at(&dls_attempt) + == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_attempt) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(download_status_get_n_attempts(&dls_attempt) + == IMPOSSIBLE_TO_DOWNLOAD); + tt_assert(mock_get_options_calls == 0); + + /* Check that an attempt reset resets just before the limit */ + mock_get_options_calls = 0; + dls_attempt.n_download_failures = IMPOSSIBLE_TO_DOWNLOAD - 1; + dls_attempt.n_download_attempts = IMPOSSIBLE_TO_DOWNLOAD - 1; + download_status_reset(&dls_attempt); + /* we really want to test that it's equal to time(NULL) + delay0, but that's + * an unrealiable test, because time(NULL) might change. */ + tt_assert(download_status_get_next_attempt_at(&dls_attempt) + >= current_time + delay0); + tt_assert(download_status_get_next_attempt_at(&dls_attempt) + != TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_attempt) == 0); + tt_assert(download_status_get_n_attempts(&dls_attempt) == 0); + tt_assert(mock_get_options_calls >= 1); + + /* Check that attempt increments don't happen on failure-based schedules, + * and that the attempt is set at the end of time */ + mock_get_options_calls = 0; + next_at = download_status_increment_attempt(&dls_failure, "test", + current_time); + tt_assert(next_at == TIME_MAX); + tt_assert(download_status_get_n_failures(&dls_failure) == 0); + tt_assert(download_status_get_n_attempts(&dls_failure) == 0); + tt_assert(mock_get_options_calls == 0); + + done: + /* the pointers in schedule are allocated on the stack */ + smartlist_free(schedule); + UNMOCK(get_options); + mock_options = NULL; + mock_get_options_calls = 0; +} + #define DIR_LEGACY(name) \ { #name, test_dir_ ## name , TT_FORK, NULL, NULL } @@ -3525,6 +3954,8 @@ struct testcase_t dir_tests[] = { DIR(purpose_needs_anonymity, 0), DIR(fetch_type, 0), DIR(packages, 0), + DIR(download_status_schedule, 0), + DIR(download_status_increment, 0), END_OF_TESTCASES };