r14448@Kushana: nickm | 2007-09-17 14:26:56 -0400

Unify all of the divergent median/nth-percentile code in container.[ch]


svn:r11457
This commit is contained in:
Nick Mathewson 2007-09-17 18:27:49 +00:00
parent 93d4ad9743
commit 8c13967803
7 changed files with 139 additions and 177 deletions

View File

@ -45,6 +45,7 @@ Changes in version 0.2.0.7-alpha - 2007-??-??
of a file in memory at once before we write to disk. Tor, meet stdio.
- Turn "descriptor store" into a full-fledged type.
- Move all NT services code into a separate source file.
- Unify all code that computes medians, percentile elements, etc.
Changes in version 0.1.2.17 - 2007-08-30

View File

@ -1133,3 +1133,35 @@ digestmap_size(const digestmap_t *map)
return HT_SIZE(&map->head);
}
/** declare a function called <b>funcname</b> that acts as a find_nth_XXX
* function for an array of type <b>elt_t</b>*.
*
* NOTE: The implementation kind of sucks: It's O(n log n), whereas finding
* the nth element of a list can be done in O(n). Then again, this
* implementation is not in critical path, and it is obviously correct. */
#define IMPLEMENT_ORDER_FUNC(funcname, elt_t) \
static int \
_cmp_ ## elt_t(const void *_a, const void *_b) \
{ \
const elt_t *a = _a, *b = _b; \
if (*a<*b) \
return -1; \
else if (*a>*b) \
return 1; \
else \
return 0; \
} \
elt_t \
funcname(elt_t *array, int n_elements, int nth) \
{ \
tor_assert(nth >= 0); \
tor_assert(nth < n_elements); \
qsort(array, n_elements, sizeof(elt_t), _cmp_ ##elt_t); \
return array[nth]; \
}
IMPLEMENT_ORDER_FUNC(find_nth_int, int)
IMPLEMENT_ORDER_FUNC(find_nth_time, time_t)
IMPLEMENT_ORDER_FUNC(find_nth_double, double)
IMPLEMENT_ORDER_FUNC(find_nth_uint32, uint32_t)

View File

@ -310,5 +310,34 @@ bitarray_is_set(bitarray_t *b, int bit)
return b[bit >> BITARRAY_SHIFT] & (1u << (bit & BITARRAY_MASK));
}
/* These functions, given an <b>array</b> of <b>n_elements</b>, return the
* <b>nth</b> lowest element. <b>nth</b>=0 gives the lowest element;
* <b>n_elements</b>-1 gives the highest; and (<b>n_elements</b>-1) / 2 gives
* the median. As a side effect, the elements of <b>array</b> are sorted. */
int find_nth_int(int *array, int n_elements, int nth);
time_t find_nth_time(time_t *array, int n_elements, int nth);
double find_nth_double(double *array, int n_elements, int nth);
uint32_t find_nth_uint32(uint32_t *array, int n_elements, int nth);
static INLINE int
median_int(int *array, int n_elements)
{
return find_nth_int(array, n_elements, (n_elements-1)/2);
}
static INLINE time_t
median_time(time_t *array, int n_elements)
{
return find_nth_time(array, n_elements, (n_elements-1)/2);
}
static INLINE double
median_double(double *array, int n_elements)
{
return find_nth_double(array, n_elements, (n_elements-1)/2);
}
static INLINE uint32_t
median_uint32(uint32_t *array, int n_elements)
{
return find_nth_uint32(array, n_elements, (n_elements-1)/2);
}
#endif

View File

@ -1569,28 +1569,6 @@ dirserv_thinks_router_is_unreliable(time_t now,
return 0;
}
/** Helper: returns a tristate based on comparing **(uint32_t**)<b>a</b>
* to **(uint32_t**)<b>b</b>. */
static int
_compare_uint32(const void **a, const void **b)
{
uint32_t first = **(uint32_t **)a, second = **(uint32_t **)b;
if (first < second) return -1;
if (first > second) return 1;
return 0;
}
/** Helper: returns a tristate based on comparing **(double**)<b>a</b>
* to **(double**)<b>b</b>. */
static int
_compare_double(const void **a, const void **b)
{
double first = **(double **)a, second = **(double **)b;
if (first < second) return -1;
if (first > second) return 1;
return 0;
}
/** Look through the routerlist, and assign the median uptime of running valid
* servers to stable_uptime, and the relative bandwidth capacities to
* fast_bandwidth and guard_bandwidth. Set total_bandwidth to the total
@ -1600,7 +1578,9 @@ _compare_double(const void **a, const void **b)
static void
dirserv_compute_performance_thresholds(routerlist_t *rl)
{
smartlist_t *uptimes, *mtbfs, *bandwidths, *bandwidths_excluding_exits;
int n_active, n_active_nonexit;
uint32_t *uptimes, *bandwidths, *bandwidths_excluding_exits;
double *mtbfs;
time_t now = time(NULL);
/* initialize these all here, in case there are no routers */
@ -1612,63 +1592,48 @@ dirserv_compute_performance_thresholds(routerlist_t *rl)
total_bandwidth = 0;
total_exit_bandwidth = 0;
uptimes = smartlist_create();
mtbfs = smartlist_create();
bandwidths = smartlist_create();
bandwidths_excluding_exits = smartlist_create();
n_active = n_active_nonexit = 0;
uptimes = tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers));
bandwidths = tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers));
bandwidths_excluding_exits =
tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers));
mtbfs = tor_malloc(sizeof(double)*smartlist_len(rl->routers));
/* XXXX020 we should just use arrays and qsort. */
SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, {
if (router_is_active(ri, now)) {
uint32_t *up = tor_malloc(sizeof(uint32_t));
uint32_t *bw = tor_malloc(sizeof(uint32_t));
uint32_t *mtbf = tor_malloc(sizeof(double));
const char *id = ri->cache_info.identity_digest;
uint32_t bw;
ri->is_exit = exit_policy_is_general_exit(ri->exit_policy);
*up = (uint32_t) real_uptime(ri, now);
smartlist_add(uptimes, up);
*mtbf = rep_hist_get_stability(ri->cache_info.identity_digest, now);
smartlist_add(mtbfs, mtbf);
*bw = router_get_advertised_bandwidth(ri);
total_bandwidth += *bw;
uptimes[n_active] = real_uptime(ri, now);
mtbfs[n_active] = rep_hist_get_stability(id, now);
bandwidths[n_active] = bw = router_get_advertised_bandwidth(ri);
total_bandwidth += bw;
if (ri->is_exit && !ri->is_bad_exit) {
total_exit_bandwidth += *bw;
total_exit_bandwidth += bw;
} else {
uint32_t *bw_not_exit = tor_malloc(sizeof(uint32_t));
*bw_not_exit = *bw;
smartlist_add(bandwidths_excluding_exits, bw_not_exit);
bandwidths_excluding_exits[n_active_nonexit] = bw;
++n_active_nonexit;
}
smartlist_add(bandwidths, bw);
++n_active;
}
});
smartlist_sort(uptimes, _compare_uint32);
smartlist_sort(mtbfs, _compare_double);
smartlist_sort(bandwidths, _compare_uint32);
smartlist_sort(bandwidths_excluding_exits, _compare_uint32);
if (smartlist_len(uptimes))
stable_uptime = *(uint32_t*)smartlist_get(uptimes,
smartlist_len(uptimes)/2);
if (smartlist_len(mtbfs))
stable_mtbf = *(double*)smartlist_get(mtbfs,
smartlist_len(mtbfs)/2);
enough_mtbf_info = rep_hist_have_measured_enough_stability();
if (smartlist_len(bandwidths)) {
fast_bandwidth = *(uint32_t*)smartlist_get(bandwidths,
smartlist_len(bandwidths)/8);
if (n_active) {
stable_uptime = median_uint32(uptimes, n_active);
stable_mtbf = median_double(mtbfs, n_active);
fast_bandwidth = find_nth_uint32(bandwidths, n_active, n_active/8);
/* Now bandwidths is sorted. */
if (fast_bandwidth < ROUTER_REQUIRED_MIN_BANDWIDTH)
fast_bandwidth = *(uint32_t*)smartlist_get(bandwidths,
smartlist_len(bandwidths)/4);
guard_bandwidth_including_exits =
*(uint32_t*)smartlist_get(bandwidths, smartlist_len(bandwidths)/2);
fast_bandwidth = bandwidths[n_active/4];
guard_bandwidth_including_exits = bandwidths[(n_active-1)/2];
}
if (smartlist_len(bandwidths_excluding_exits)) {
enough_mtbf_info = rep_hist_have_measured_enough_stability();
if (n_active_nonexit) {
guard_bandwidth_excluding_exits =
*(uint32_t*)smartlist_get(bandwidths_excluding_exits,
smartlist_len(bandwidths_excluding_exits)/2);
median_uint32(bandwidths_excluding_exits, n_active_nonexit);
}
log(LOG_INFO, LD_DIRSERV,
@ -1678,14 +1643,10 @@ dirserv_compute_performance_thresholds(routerlist_t *rl)
(unsigned long)guard_bandwidth_including_exits,
(unsigned long)guard_bandwidth_excluding_exits);
SMARTLIST_FOREACH(uptimes, uint32_t *, up, tor_free(up));
SMARTLIST_FOREACH(mtbfs, double *, mtbf, tor_free(mtbf));
SMARTLIST_FOREACH(bandwidths, uint32_t *, bw, tor_free(bw));
SMARTLIST_FOREACH(bandwidths_excluding_exits, uint32_t *, bw, tor_free(bw));
smartlist_free(uptimes);
smartlist_free(mtbfs);
smartlist_free(bandwidths);
smartlist_free(bandwidths_excluding_exits);
tor_free(uptimes);
tor_free(mtbfs);
tor_free(bandwidths);
tor_free(bandwidths_excluding_exits);
}
/** Given a platform string as in a routerinfo_t (possibly null), return a

View File

@ -81,54 +81,6 @@ networkstatus_get_voter_by_id(networkstatus_vote_t *vote,
return NULL;
}
/** Helper for sorting a list of time_t*. */
static int
_compare_times(const void **_a, const void **_b)
{
const time_t *a = *_a, *b = *_b;
if (*a<*b)
return -1;
else if (*a>*b)
return 1;
else
return 0;
}
/** Helper for sorting a list of int*. */
static int
_compare_ints(const void **_a, const void **_b)
{
const int *a = *_a, *b = *_b;
if (*a<*b)
return -1;
else if (*a>*b)
return 1;
else
return 0;
}
/** Given a list of one or more time_t*, return the (low) median. */
/*static*/ time_t
median_time(smartlist_t *times)
{
int idx;
tor_assert(smartlist_len(times));
smartlist_sort(times, _compare_times);
idx = (smartlist_len(times)-1)/2;
return *(time_t*)smartlist_get(times, idx);
}
/** Given a list of one or more int*, return the (low) median. */
/*static*/ int
median_int(smartlist_t *ints)
{
int idx;
tor_assert(smartlist_len(ints));
smartlist_sort(ints, _compare_ints);
idx = (smartlist_len(ints)-1)/2;
return *(int*)smartlist_get(ints, idx);
}
/** Given a vote <b>vote</b> (not a consensus!), return its associated
* networkstatus_voter_info_t.*/
static networkstatus_voter_info_t *
@ -322,11 +274,12 @@ networkstatus_compute_consensus(smartlist_t *votes,
/* Compute medians of time-related things, and figure out how many
* routers we might need to talk about. */
{
smartlist_t *va_times = smartlist_create();
smartlist_t *fu_times = smartlist_create();
smartlist_t *vu_times = smartlist_create();
smartlist_t *votesec_list = smartlist_create();
smartlist_t *distsec_list = smartlist_create();
int n_votes = smartlist_len(votes);
time_t *va_times = tor_malloc(n_votes * sizeof(time_t));
time_t *fu_times = tor_malloc(n_votes * sizeof(time_t));
time_t *vu_times = tor_malloc(n_votes * sizeof(time_t));
int *votesec_list = tor_malloc(n_votes * sizeof(int));
int *distsec_list = tor_malloc(n_votes * sizeof(int));
int n_versioning_clients = 0, n_versioning_servers = 0;
smartlist_t *combined_client_versions = smartlist_create();
smartlist_t *combined_server_versions = smartlist_create();
@ -334,11 +287,11 @@ networkstatus_compute_consensus(smartlist_t *votes,
SMARTLIST_FOREACH(votes, networkstatus_vote_t *, v,
{
tor_assert(v->is_vote);
smartlist_add(va_times, &v->valid_after);
smartlist_add(fu_times, &v->fresh_until);
smartlist_add(vu_times, &v->valid_until);
smartlist_add(votesec_list, &v->vote_seconds);
smartlist_add(distsec_list, &v->dist_seconds);
va_times[v_sl_idx] = v->valid_after;
fu_times[v_sl_idx] = v->fresh_until;
vu_times[v_sl_idx] = v->valid_until;
votesec_list[v_sl_idx] = v->vote_seconds;
distsec_list[v_sl_idx] = v->dist_seconds;
if (v->client_versions) {
smartlist_t *cv = smartlist_create();
++n_versioning_clients;
@ -360,11 +313,11 @@ networkstatus_compute_consensus(smartlist_t *votes,
SMARTLIST_FOREACH(v->known_flags, const char *, cp,
smartlist_add(flags, tor_strdup(cp)));
});
valid_after = median_time(va_times);
fresh_until = median_time(fu_times);
valid_until = median_time(vu_times);
vote_seconds = median_int(votesec_list);
dist_seconds = median_int(distsec_list);
valid_after = median_time(va_times, n_votes);
fresh_until = median_time(fu_times, n_votes);
valid_until = median_time(vu_times, n_votes);
vote_seconds = median_int(votesec_list, n_votes);
dist_seconds = median_int(distsec_list, n_votes);
/*
SMARTLIST_FOREACH(va_times, int*, i,
@ -400,11 +353,11 @@ networkstatus_compute_consensus(smartlist_t *votes,
smartlist_sort_strings(flags);
smartlist_uniq_strings(flags);
smartlist_free(va_times);
smartlist_free(fu_times);
smartlist_free(vu_times);
smartlist_free(votesec_list);
smartlist_free(distsec_list);
tor_free(va_times);
tor_free(fu_times);
tor_free(vu_times);
tor_free(votesec_list);
tor_free(distsec_list);
}
chunks = smartlist_create();

View File

@ -2932,8 +2932,6 @@ const char *dirvote_get_pending_detached_signatures(void);
const cached_dir_t *dirvote_get_vote(const char *id);
#ifdef DIRVOTE_PRIVATE
time_t median_time(smartlist_t *times);
int median_int(smartlist_t *times);
int networkstatus_check_voter_signature(networkstatus_vote_t *consensus,
networkstatus_voter_info_t *voter,
authority_cert_t *cert);
@ -3184,6 +3182,7 @@ int rep_hist_load_mtbf_data(time_t now);
time_t rep_hist_downrate_old_runs(time_t now);
double rep_hist_get_stability(const char *id, time_t when);
double rep_hist_get_weighted_fractional_uptime(const char *id, time_t when);
int rep_hist_have_measured_enough_stability(void);
void rep_hist_note_used_port(uint16_t port, time_t now);

View File

@ -2386,42 +2386,29 @@ test_same_voter(networkstatus_voter_info_t *v1,
}
static void
test_util_dirvote_helpers(void)
test_util_order_functions(void)
{
smartlist_t *sl = smartlist_create();
int a=12,b=24,c=25,d=60,e=77;
time_t v=99, w=150, x=700, y=1000, z=time(NULL);
int lst[25], n = 0;
// int a=12,b=24,c=25,d=60,e=77;
test_assert(y<z);
smartlist_add(sl, &a);
test_eq(a, median_int(sl)); /* a */
smartlist_add(sl, &e);
smartlist_shuffle(sl);
test_eq(a, median_int(sl)); /* a,e */
smartlist_add(sl, &e);
smartlist_shuffle(sl);
test_eq(e, median_int(sl)); /* a,e,e */
smartlist_add(sl, &b);
test_eq(b, median_int(sl)); /* a,b,e,e */
smartlist_add(sl, &d);
smartlist_add(sl, &a);
smartlist_add(sl, &c);
smartlist_shuffle(sl);
test_eq(c, median_int(sl)); /* a,a,b,c,d,e,e */
#define median() median_int(lst, n)
smartlist_clear(sl);
smartlist_add(sl, &y);
test_eq(y, median_time(sl)); /*y*/
smartlist_add(sl, &w);
test_eq(w, median_time(sl)); /*w,y*/
smartlist_add(sl, &x);
test_eq(x, median_time(sl)); /*w,x,y*/
smartlist_add(sl, &v);
test_eq(w, median_time(sl)); /*v,w,x,y*/
smartlist_add(sl, &z);
test_eq(x, median_time(sl)); /*v,w,x,y,z*/
smartlist_free(sl);
lst[n++] = 12;
test_eq(12, median()); /* 12 */
lst[n++] = 77;
//smartlist_shuffle(sl);
test_eq(12, median()); /* 12, 77 */
lst[n++] = 77;
//smartlist_shuffle(sl);
test_eq(77, median()); /* 12, 77, 77 */
lst[n++] = 24;
test_eq(24, median()); /* 12,24,77,77 */
lst[n++] = 60;
lst[n++] = 12;
lst[n++] = 25;
//smartlist_shuffle(sl);
test_eq(25, median()); /* 12,12,24,25,60,77,77 */
#undef median
}
static void
@ -3121,7 +3108,7 @@ static struct {
SUBENT(util, pqueue),
SUBENT(util, mmap),
SUBENT(util, threads),
SUBENT(util, dirvote_helpers),
SUBENT(util, order_functions),
ENT(onion_handshake),
ENT(dir_format),
ENT(v3_networkstatus),