r16573@catbus: nickm | 2007-11-08 11:57:16 -0500

Mess with the formula for the Guard flag again.  Now it requires that you be in the most familiar 7/8 of nodes, and have above median wfu for that 7/8th.  See spec for details.  Also, log thresholds better.


svn:r12440
This commit is contained in:
Nick Mathewson 2007-11-08 16:58:59 +00:00
parent 46273c9080
commit e0b9c893bc
8 changed files with 108 additions and 24 deletions

View File

@ -241,6 +241,10 @@ Changes in version 0.2.0.9-alpha - 2007-10-24
should exist before trying to replace the current one.
- Use a more forgiving schedule for retrying failed consensus
downloads than for other types.
- Adjust the guard selection formula that authorities use a little more:
require guards to be in the top 7/8 in terms of how long we have
known about them, and above the median of those nodes in terms of
weighted fractional uptime.
o Minor bugfixes (other directory issues):
- Correct the implementation of "download votes by digest." Bugfix on

View File

@ -23,7 +23,7 @@ Items blocking 0.2.0.10-alpha:
after we've picked it". We should write a real proposal for this --
in 0.2.1.x.
- Delay the separation of flags till 0.2.1.x. -NM
- Let's come up with a good formula for Guard.
o Let's come up with a good formula for Guard.
Here's a go:

View File

@ -972,11 +972,11 @@ $Id$
"Running" -- A router is 'Running' if the authority managed to connect to
it successfully within the last 30 minutes.
"Stable" -- A router is 'Stable' if it is active, and either its
Weighted MTBF is at least the median for known active routers or
its Weighted MTBF is at least 10 days. Routers are never called Stable if
they are running a version of Tor known to drop circuits stupidly.
(0.1.1.10-alpha through 0.1.1.16-rc are stupid this way.)
"Stable" -- A router is 'Stable' if it is active, and either its Weighted
MTBF is at least the median for known active routers or its Weighted MTBF
corresponds to at least 7 days. Routers are never called Stable if they are
running a version of Tor known to drop circuits stupidly. (0.1.1.10-alpha
through 0.1.1.16-rc are stupid this way.)
To calculate weighted MTBF, compute the weighted mean of the lengths
of all intervals when the router was observed to be up, weighting
@ -991,9 +991,9 @@ $Id$
either in the top 7/8ths for known active routers or at least 100KB/s.
"Guard" -- A router is a possible 'Guard' if its Weighted Fractional
Uptime is at least the median for known active routers, and its bandwidth
is either at least the median for known active routers or at least
250KB/s. If the total bandwidth of active non-BadExit Exit servers is less
Uptime is at least the median for "familiar" active routers, and if
its bandwidth is at least median or at least 250KB/s.
If the total bandwidth of active non-BadExit Exit servers is less
than one third of the total bandwidth of all active servers, no Exit is
listed as a Guard.
@ -1001,6 +1001,9 @@ $Id$
of time that the router is up in any given day, weighting so that
downtime and uptime in the past counts less.
A node is 'familiar' if 1/8 of all active nodes have appeared more
recently than it, OR it has been around for a few weeks.
"Authority" -- A router is called an 'Authority' if the authority
generating the network-status document believes it is an authority.

View File

@ -1214,4 +1214,5 @@ IMPLEMENT_ORDER_FUNC(find_nth_int, int)
IMPLEMENT_ORDER_FUNC(find_nth_time, time_t)
IMPLEMENT_ORDER_FUNC(find_nth_double, double)
IMPLEMENT_ORDER_FUNC(find_nth_uint32, uint32_t)
IMPLEMENT_ORDER_FUNC(find_nth_long, long)

View File

@ -322,6 +322,7 @@ int find_nth_int(int *array, int n_elements, int nth);
time_t find_nth_time(time_t *array, int n_elements, int nth);
double find_nth_double(double *array, int n_elements, int nth);
uint32_t find_nth_uint32(uint32_t *array, int n_elements, int nth);
long find_nth_long(long *array, int n_elements, int nth);
static INLINE int
median_int(int *array, int n_elements)
{
@ -342,6 +343,11 @@ median_uint32(uint32_t *array, int n_elements)
{
return find_nth_uint32(array, n_elements, (n_elements-1)/2);
}
static INLINE long
median_long(long *array, int n_elements)
{
return find_nth_long(array, n_elements, (n_elements-1)/2);
}
#endif

View File

@ -1459,14 +1459,22 @@ should_generate_v2_networkstatus(void)
* current guards. */
#define UPTIME_TO_GUARANTEE_STABLE (3600*24*30)
/** If a router's MTBF is at least this value, then it is always stable.
* See above. */
#define MTBF_TO_GUARANTEE_STABLE (60*60*24*10)
* See above. (Corresponds to about 7 days for current decay rates.) */
#define MTBF_TO_GUARANTEE_STABLE (60*60*24*5)
/** Similarly, we protect sufficiently fast nodes from being pushed
* out of the set of Fast nodes. */
#define BANDWIDTH_TO_GUARANTEE_FAST (100*1024)
/** Similarly, every node with sufficient bandwidth can be considered
* for Guard status. */
#define BANDWIDTH_TO_GUARANTEE_GUARD (250*1024)
/** Similarly, every node with at least this much weighted time known can be
* considered familiar enough to be a guard. Corresponds to about 20 days for
* current decay rates.
*/
#define TIME_KNOWN_TO_GUARANTEE_FAMILIAR (8*24*60*60)
/** Similarly, every node with sufficient WFU is around enough to be a guard.
*/
#define WFU_TO_GUARANTEE_GUARD (0.995)
/* Thresholds for server performance: set by
* dirserv_compute_performance_thresholds, and used by
@ -1475,6 +1483,7 @@ static uint32_t stable_uptime = 0; /* start at a safe value */
static double stable_mtbf = 0.0;
static int enough_mtbf_info = 0;
static double guard_wfu = 0.0;
static long guard_tk = 0;
static uint32_t fast_bandwidth = 0;
static uint32_t guard_bandwidth_including_exits = 0;
static uint32_t guard_bandwidth_excluding_exits = 0;
@ -1514,13 +1523,13 @@ dirserv_thinks_router_is_unreliable(time_t now,
} else {
double mtbf =
rep_hist_get_stability(router->cache_info.identity_digest, now);
if (mtbf < stable_mtbf && mtbf < MTBF_TO_GUARANTEE_STABLE)
if (mtbf < stable_mtbf)
return 1;
}
}
if (need_capacity) {
uint32_t bw = router_get_advertised_bandwidth(router);
if (bw < fast_bandwidth && bw < BANDWIDTH_TO_GUARANTEE_FAST)
if (bw < fast_bandwidth)
return 1;
}
return 0;
@ -1550,16 +1559,22 @@ dirserv_thinks_router_is_hs_dir(routerinfo_t *router, time_t now)
static void
dirserv_compute_performance_thresholds(routerlist_t *rl)
{
int n_active, n_active_nonexit;
int n_active, n_active_nonexit, n_familiar;
uint32_t *uptimes, *bandwidths, *bandwidths_excluding_exits;
long *tks;
double *mtbfs, *wfus;
time_t now = time(NULL);
/* DOCDOC this is a litle tricky; comment this function better. */
/* initialize these all here, in case there are no routers */
stable_uptime = 0;
stable_mtbf = 0;
fast_bandwidth = 0;
guard_bandwidth_including_exits = 0;
guard_bandwidth_excluding_exits = 0;
guard_tk = 0;
guard_wfu = 0;
total_bandwidth = 0;
total_exit_bandwidth = 0;
@ -1570,6 +1585,7 @@ dirserv_compute_performance_thresholds(routerlist_t *rl)
bandwidths_excluding_exits =
tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers));
mtbfs = tor_malloc(sizeof(double)*smartlist_len(rl->routers));
tks = tor_malloc(sizeof(long)*smartlist_len(rl->routers));
wfus = tor_malloc(sizeof(double)*smartlist_len(rl->routers));
SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, {
@ -1579,7 +1595,7 @@ dirserv_compute_performance_thresholds(routerlist_t *rl)
ri->is_exit = exit_policy_is_general_exit(ri->exit_policy);
uptimes[n_active] = real_uptime(ri, now);
mtbfs[n_active] = rep_hist_get_stability(id, now);
wfus [n_active] = rep_hist_get_weighted_fractional_uptime(id, now);
tks [n_active] = rep_hist_get_weighted_time_known(id, now);
bandwidths[n_active] = bw = router_get_advertised_bandwidth(ri);
total_bandwidth += bw;
if (ri->is_exit && !ri->is_bad_exit) {
@ -1595,14 +1611,35 @@ dirserv_compute_performance_thresholds(routerlist_t *rl)
if (n_active) {
stable_uptime = median_uint32(uptimes, n_active);
stable_mtbf = median_double(mtbfs, n_active);
guard_wfu = median_double(wfus, n_active);
fast_bandwidth = find_nth_uint32(bandwidths, n_active, n_active/8);
/* Now bandwidths is sorted. */
if (fast_bandwidth < ROUTER_REQUIRED_MIN_BANDWIDTH)
fast_bandwidth = bandwidths[n_active/4];
guard_bandwidth_including_exits = bandwidths[(n_active-1)/2];
guard_tk = find_nth_long(tks, n_active, n_active/8);
}
if (guard_tk > TIME_KNOWN_TO_GUARANTEE_FAMILIAR)
guard_tk = TIME_KNOWN_TO_GUARANTEE_FAMILIAR;
if (fast_bandwidth > BANDWIDTH_TO_GUARANTEE_FAST)
fast_bandwidth = BANDWIDTH_TO_GUARANTEE_FAST;
n_familiar = 0;
SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, {
if (router_is_active(ri, now)) {
const char *id = ri->cache_info.identity_digest;
long tk = rep_hist_get_weighted_time_known(id, now);
if (tk < guard_tk)
continue;
wfus[n_familiar++] = rep_hist_get_weighted_fractional_uptime(id, now);
}
});
if (n_familiar)
guard_wfu = median_double(wfus, n_familiar);
if (guard_wfu > WFU_TO_GUARANTEE_GUARD)
guard_wfu = WFU_TO_GUARANTEE_GUARD;
enough_mtbf_info = rep_hist_have_measured_enough_stability();
if (n_active_nonexit) {
@ -1610,19 +1647,25 @@ dirserv_compute_performance_thresholds(routerlist_t *rl)
median_uint32(bandwidths_excluding_exits, n_active_nonexit);
}
/*XXXX020 Log the other stuff too. */
log(LOG_INFO, LD_DIRSERV,
"Cutoffs: %lus uptime, %lu b/s fast, %lu or %lu b/s guard.",
"Cutoffs: For Stable, %lu sec uptime, %lu sec MTBF. "
"For Fast: %lu bytes/sec."
"For Guard: WFU %.03lf%%, time-known %lu sec, "
"and bandwidth %lu or %lu bytes/sec.",
(unsigned long)stable_uptime,
(unsigned long)stable_mtbf,
(unsigned long)fast_bandwidth,
guard_wfu*100,
(unsigned long)guard_tk,
(unsigned long)guard_bandwidth_including_exits,
(unsigned long)guard_bandwidth_excluding_exits);
tor_free(uptimes);
tor_free(mtbfs);
tor_free(wfus);
tor_free(bandwidths);
tor_free(bandwidths_excluding_exits);
tor_free(tks);
tor_free(wfus);
}
/** Given a platform string as in a routerinfo_t (possibly null), return a
@ -1852,9 +1895,11 @@ set_routerstatus_from_routerinfo(routerstatus_t *rs,
router_get_advertised_bandwidth(ri) >=
(exits_can_be_guards ? guard_bandwidth_including_exits :
guard_bandwidth_excluding_exits))) {
long tk = rep_hist_get_weighted_time_known(
ri->cache_info.identity_digest, now);
double wfu = rep_hist_get_weighted_fractional_uptime(
ri->cache_info.identity_digest, now);
rs->is_possible_guard = (wfu >= guard_wfu) ? 1 : 0;
rs->is_possible_guard = (wfu >= guard_wfu && tk >= guard_tk) ? 1 : 0;
} else {
rs->is_possible_guard = 0;
}

View File

@ -3441,6 +3441,7 @@ int rep_hist_load_mtbf_data(time_t now);
time_t rep_hist_downrate_old_runs(time_t now);
double rep_hist_get_stability(const char *id, time_t when);
double rep_hist_get_weighted_fractional_uptime(const char *id, time_t when);
long rep_hist_get_weighted_time_known(const char *id, time_t when);
int rep_hist_have_measured_enough_stability(void);
void rep_hist_note_used_port(uint16_t port, time_t now);

View File

@ -384,8 +384,18 @@ get_stability(or_history_t *hist, time_t when)
return total / total_weights;
}
/* Until we've known about you for this long, you simply can't be up. */
#define MIN_WEIGHTED_TIME_TO_BE_UP (18*60*60)
/** DODDOC */
static long
get_total_weighted_time(or_history_t *hist, time_t when)
{
long total = hist->total_weighted_time;
if (hist->start_of_run) {
total += (when - hist->start_of_run);
} else if (hist->start_of_downtime) {
total += (when - hist->start_of_downtime);
}
return total;
}
/** Helper: Return the weighted percent-of-time-online of the router with
* history <b>hist</b>. */
@ -402,8 +412,6 @@ get_weighted_fractional_uptime(or_history_t *hist, time_t when)
} else if (hist->start_of_downtime) {
total += (when - hist->start_of_downtime);
}
if (total < MIN_WEIGHTED_TIME_TO_BE_UP)
return 0.0;
return ((double) up) / total;
}
@ -431,6 +439,22 @@ rep_hist_get_weighted_fractional_uptime(const char *id, time_t when)
return get_weighted_fractional_uptime(hist, when);
}
/** Return a number representing how long we've known about the router whose
* digest is <b>id</b>. Return 0 if the router is unknown.
*
* Be careful: this measure incresases monotonically as we know the router for
* longer and longer, but it doesn't increase linearly.
*/
long
rep_hist_get_weighted_time_known(const char *id, time_t when)
{
or_history_t *hist = get_or_history(id);
if (!hist)
return 0;
return get_total_weighted_time(hist, when);
}
/** Return true if we've been measuring MTBFs for long enough to
* prounounce on Stability. */
int