Merge commit 'origin/maint-0.2.1'

This commit is contained in:
Nick Mathewson 2009-06-30 10:13:51 -04:00
commit 9f28cfe86a
7 changed files with 56 additions and 39 deletions

View File

@ -40,9 +40,21 @@ Changes in version 0.2.2.1-alpha - 2009-??-??
Changes in version 0.2.1.17-?? - 2009-??-??
o Major bugfixes:
- Directory authorities were neglecting to mark relays down in their
internal histories if the relays fall off the routerlist without
ever being found unreachable. So there were relays in the histories
that haven't been seen for eight months, and are listed as being
up for eight months. This wreaked havoc on the "median wfu"
and "median mtbf" calculations, in turn making Guard and Stable
flags very wrong, hurting network performance. Fixes bugs 696 and
969. Bugfix on 0.2.0.6-alpha.
o Minor bugfixes:
- Serve the DirPortFrontPage page even when we have been approaching
our quotas recently. Fixes bug 1013; bugfix on 0.2.1.8-alpha.
- Do not cap bandwidths reported by directory authorities; they are
already adjusted to reflect reality.
o Major features:
- Clients now use the bandwidth values in the consensus, rather than

View File

@ -797,7 +797,7 @@ directory_remove_invalid(void)
if (r & FP_REJECT) {
log_info(LD_DIRSERV, "Router '%s' is now rejected: %s",
ent->nickname, msg?msg:"");
routerlist_remove(rl, ent, 0);
routerlist_remove(rl, ent, 0, time(NULL));
i--;
changed = 1;
continue;
@ -951,8 +951,8 @@ dirserv_set_router_is_running(routerinfo_t *router, time_t now)
answer = get_options()->AssumeReachable ||
now < router->last_reachable + REACHABLE_TIMEOUT;
if (router->is_running && !answer) {
/* it was running but now it's not. tell rephist. */
if (!answer) {
/* not considered reachable. tell rephist. */
rep_hist_note_router_unreachable(router->cache_info.identity_digest, now);
}

View File

@ -1792,7 +1792,7 @@ evdns_server_request_format_response(struct server_request *req, int err)
if (j > 512) {
overflow:
j = 512;
buf[3] |= 0x02; /* set the truncated bit. */
buf[2] |= 0x02; /* set the truncated bit. */
}
req->response_len = (size_t)j;

View File

@ -925,7 +925,7 @@ run_scheduled_events(time_t now)
time_to_downrate_stability = rep_hist_downrate_old_runs(now);
if (authdir_mode_tests_reachability(options)) {
if (time_to_save_stability < now) {
if (time_to_save_stability && rep_hist_record_mtbf_data()<0) {
if (time_to_save_stability && rep_hist_record_mtbf_data(now, 1)<0) {
log_warn(LD_GENERAL, "Couldn't store mtbf data.");
}
#define SAVE_STABILITY_INTERVAL (30*60)
@ -1970,14 +1970,15 @@ tor_cleanup(void)
/* Remove our pid file. We don't care if there was an error when we
* unlink, nothing we could do about it anyways. */
if (options->command == CMD_RUN_TOR) {
time_t now = time(NULL);
if (options->PidFile)
unlink(options->PidFile);
if (accounting_is_enabled(options))
accounting_record_bandwidth_usage(time(NULL), get_or_state());
accounting_record_bandwidth_usage(now, get_or_state());
or_state_mark_dirty(get_or_state(), 0); /* force an immediate save. */
or_state_save(time(NULL));
or_state_save(now);
if (authdir_mode_tests_reachability(options))
rep_hist_record_mtbf_data();
rep_hist_record_mtbf_data(now, 0);
}
#ifdef USE_DMALLOC
dmalloc_log_stats();

View File

@ -3969,7 +3969,7 @@ void rep_history_clean(time_t before);
void rep_hist_note_router_reachable(const char *id, time_t when);
void rep_hist_note_router_unreachable(const char *id, time_t when);
int rep_hist_record_mtbf_data(void);
int rep_hist_record_mtbf_data(time_t now, int missing_means_down);
int rep_hist_load_mtbf_data(time_t now);
time_t rep_hist_downrate_old_runs(time_t now);
@ -4405,7 +4405,8 @@ void routerinfo_free(routerinfo_t *router);
void extrainfo_free(extrainfo_t *extrainfo);
void routerlist_free(routerlist_t *rl);
void dump_routerlist_mem_usage(int severity);
void routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old);
void routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old,
time_t now);
void routerlist_free_all(void);
void routerlist_reset_warnings(void);
void router_set_status(const char *digest, int up);

View File

@ -683,9 +683,13 @@ rep_history_clean(time_t before)
}
}
/** Write MTBF data to disk. Returns 0 on success, negative on failure. */
/** Write MTBF data to disk. Return 0 on success, negative on failure.
*
* If <b>missing_means_down</b>, then if we're about to write an entry
* that is still considered up but isn't in our routerlist, consider it
* to be down. */
int
rep_hist_record_mtbf_data(void)
rep_hist_record_mtbf_data(time_t now, int missing_means_down)
{
char time_buf[ISO_TIME_LEN+1];
@ -745,6 +749,18 @@ rep_hist_record_mtbf_data(void)
hist = (or_history_t*) or_history_p;
base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN);
if (missing_means_down && hist->start_of_run &&
!router_get_by_digest(digest)) {
/* We think this relay is running, but it's not listed in our
* routerlist. Somehow it fell out without telling us it went
* down. Complain and also correct it. */
log_info(LD_HIST,
"Relay '%s' is listed as up in rephist, but it's not in "
"our routerlist. Correcting.", dbuf);
rep_hist_note_router_unreachable(digest, now);
}
PRINTF((f, "R %s\n", dbuf));
if (hist->start_of_run > 0) {
format_iso_time(time_buf, hist->start_of_run);

View File

@ -1523,15 +1523,12 @@ router_get_advertised_bandwidth_capped(routerinfo_t *router)
return result;
}
/** Eventually, the number we return will come from the directory
* consensus, so clients can dynamically update to better numbers.
*
* But for now, or in case there is no consensus available, just return
* a sufficient default. */
static uint32_t
get_max_believable_bandwidth(void)
/** Return bw*1000, unless bw*1000 would overflow, in which case return
* INT32_MAX. */
static INLINE int32_t
kb_to_bytes(uint32_t bw)
{
return DEFAULT_MAX_BELIEVABLE_BANDWIDTH;
return (bw > (INT32_MAX/1000)) ? INT32_MAX : bw*1000;
}
/** Helper function:
@ -1568,7 +1565,6 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
int n_unknown = 0;
bitarray_t *exit_bits;
bitarray_t *guard_bits;
uint32_t max_believable_bw = get_max_believable_bandwidth();
int me_idx = -1;
/* Can't choose exit and guard at same time */
@ -1598,7 +1594,7 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
is_exit = status->is_exit;
is_guard = status->is_possible_guard;
if (status->has_bandwidth) {
this_bw = status->bandwidth*1000;
this_bw = kb_to_bytes(status->bandwidth);
} else { /* guess */
/* XXX022 once consensuses always list bandwidths, we can take
* this guessing business out. -RD */
@ -1617,7 +1613,7 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
is_exit = router->is_exit;
is_guard = router->is_possible_guard;
if (rs && rs->has_bandwidth) {
this_bw = rs->bandwidth*1000;
this_bw = kb_to_bytes(rs->bandwidth);
} else if (rs) { /* guess; don't trust the descriptor */
/* XXX022 once consensuses always list bandwidths, we can take
* this guessing business out. -RD */
@ -1626,27 +1622,15 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
flags |= is_exit ? 2 : 0;
flags |= is_guard ? 4 : 0;
} else /* bridge or other descriptor not in our consensus */
this_bw = router_get_advertised_bandwidth(router);
this_bw = router_get_advertised_bandwidth_capped(router);
}
if (is_exit)
bitarray_set(exit_bits, i);
if (is_guard)
bitarray_set(guard_bits, i);
/* if they claim something huge, don't believe it */
if (this_bw > max_believable_bw) {
char fp[HEX_DIGEST_LEN+1];
base16_encode(fp, sizeof(fp), statuses ?
status->identity_digest :
router->cache_info.identity_digest,
DIGEST_LEN);
log_fn(LOG_PROTOCOL_WARN, LD_DIR,
"Bandwidth %d for router %s (%s) exceeds allowed max %d, capping",
this_bw, router ? router->nickname : "(null)",
fp, max_believable_bw);
this_bw = max_believable_bw;
}
if (is_known) {
bandwidths[i] = (int32_t) this_bw; // safe since MAX_BELIEVABLE<INT32_MAX
tor_assert(bandwidths[i] >= 0);
if (is_guard)
total_guard_bw += this_bw;
else
@ -2645,7 +2629,7 @@ routerlist_insert_old(routerlist_t *rl, routerinfo_t *ri)
* If <b>make_old</b> is true, instead of deleting the router, we try adding
* it to rl-&gt;old_routers. */
void
routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old)
routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old, time_t now)
{
routerinfo_t *ri_tmp;
extrainfo_t *ei_tmp;
@ -2653,6 +2637,9 @@ routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old)
tor_assert(0 <= idx && idx < smartlist_len(rl->routers));
tor_assert(smartlist_get(rl->routers, idx) == ri);
/* make sure the rephist module knows that it's not running */
rep_hist_note_router_unreachable(ri->cache_info.identity_digest, now);
ri->cache_info.routerlist_index = -1;
smartlist_del(rl->routers, idx);
if (idx < smartlist_len(rl->routers)) {
@ -3344,7 +3331,7 @@ routerlist_remove_old_routers(void)
log_info(LD_DIR,
"Forgetting obsolete (too old) routerinfo for router '%s'",
router->nickname);
routerlist_remove(routerlist, router, 1);
routerlist_remove(routerlist, router, 1, now);
i--;
}
}