From 8c139678038832cf982ab2ef3a25679ff677c491 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 17 Sep 2007 18:27:49 +0000 Subject: [PATCH] r14448@Kushana: nickm | 2007-09-17 14:26:56 -0400 Unify all of the divergent median/nth-percentile code in container.[ch] svn:r11457 --- ChangeLog | 1 + src/common/container.c | 32 ++++++++++++ src/common/container.h | 29 +++++++++++ src/or/dirserv.c | 107 +++++++++++++---------------------------- src/or/dirvote.c | 89 ++++++++-------------------------- src/or/or.h | 3 +- src/or/test.c | 55 ++++++++------------- 7 files changed, 139 insertions(+), 177 deletions(-) diff --git a/ChangeLog b/ChangeLog index 102572b20a..80fd384459 100644 --- a/ChangeLog +++ b/ChangeLog @@ -45,6 +45,7 @@ Changes in version 0.2.0.7-alpha - 2007-??-?? of a file in memory at once before we write to disk. Tor, meet stdio. - Turn "descriptor store" into a full-fledged type. - Move all NT services code into a separate source file. + - Unify all code that computes medians, percentile elements, etc. Changes in version 0.1.2.17 - 2007-08-30 diff --git a/src/common/container.c b/src/common/container.c index a148af2663..d8b910738c 100644 --- a/src/common/container.c +++ b/src/common/container.c @@ -1133,3 +1133,35 @@ digestmap_size(const digestmap_t *map) return HT_SIZE(&map->head); } +/** declare a function called funcname that acts as a find_nth_XXX + * function for an array of type elt_t*. + * + * NOTE: The implementation kind of sucks: It's O(n log n), whereas finding + * the nth element of a list can be done in O(n). Then again, this + * implementation is not in critical path, and it is obviously correct. */ +#define IMPLEMENT_ORDER_FUNC(funcname, elt_t) \ + static int \ + _cmp_ ## elt_t(const void *_a, const void *_b) \ + { \ + const elt_t *a = _a, *b = _b; \ + if (*a<*b) \ + return -1; \ + else if (*a>*b) \ + return 1; \ + else \ + return 0; \ + } \ + elt_t \ + funcname(elt_t *array, int n_elements, int nth) \ + { \ + tor_assert(nth >= 0); \ + tor_assert(nth < n_elements); \ + qsort(array, n_elements, sizeof(elt_t), _cmp_ ##elt_t); \ + return array[nth]; \ + } + +IMPLEMENT_ORDER_FUNC(find_nth_int, int) +IMPLEMENT_ORDER_FUNC(find_nth_time, time_t) +IMPLEMENT_ORDER_FUNC(find_nth_double, double) +IMPLEMENT_ORDER_FUNC(find_nth_uint32, uint32_t) + diff --git a/src/common/container.h b/src/common/container.h index 668befdd6e..1f58b30557 100644 --- a/src/common/container.h +++ b/src/common/container.h @@ -310,5 +310,34 @@ bitarray_is_set(bitarray_t *b, int bit) return b[bit >> BITARRAY_SHIFT] & (1u << (bit & BITARRAY_MASK)); } +/* These functions, given an array of n_elements, return the + * nth lowest element. nth=0 gives the lowest element; + * n_elements-1 gives the highest; and (n_elements-1) / 2 gives + * the median. As a side effect, the elements of array are sorted. */ +int find_nth_int(int *array, int n_elements, int nth); +time_t find_nth_time(time_t *array, int n_elements, int nth); +double find_nth_double(double *array, int n_elements, int nth); +uint32_t find_nth_uint32(uint32_t *array, int n_elements, int nth); +static INLINE int +median_int(int *array, int n_elements) +{ + return find_nth_int(array, n_elements, (n_elements-1)/2); +} +static INLINE time_t +median_time(time_t *array, int n_elements) +{ + return find_nth_time(array, n_elements, (n_elements-1)/2); +} +static INLINE double +median_double(double *array, int n_elements) +{ + return find_nth_double(array, n_elements, (n_elements-1)/2); +} +static INLINE uint32_t +median_uint32(uint32_t *array, int n_elements) +{ + return find_nth_uint32(array, n_elements, (n_elements-1)/2); +} + #endif diff --git a/src/or/dirserv.c b/src/or/dirserv.c index 6975c19887..50bdc1267b 100644 --- a/src/or/dirserv.c +++ b/src/or/dirserv.c @@ -1569,28 +1569,6 @@ dirserv_thinks_router_is_unreliable(time_t now, return 0; } -/** Helper: returns a tristate based on comparing **(uint32_t**)a - * to **(uint32_t**)b. */ -static int -_compare_uint32(const void **a, const void **b) -{ - uint32_t first = **(uint32_t **)a, second = **(uint32_t **)b; - if (first < second) return -1; - if (first > second) return 1; - return 0; -} - -/** Helper: returns a tristate based on comparing **(double**)a - * to **(double**)b. */ -static int -_compare_double(const void **a, const void **b) -{ - double first = **(double **)a, second = **(double **)b; - if (first < second) return -1; - if (first > second) return 1; - return 0; -} - /** Look through the routerlist, and assign the median uptime of running valid * servers to stable_uptime, and the relative bandwidth capacities to * fast_bandwidth and guard_bandwidth. Set total_bandwidth to the total @@ -1600,7 +1578,9 @@ _compare_double(const void **a, const void **b) static void dirserv_compute_performance_thresholds(routerlist_t *rl) { - smartlist_t *uptimes, *mtbfs, *bandwidths, *bandwidths_excluding_exits; + int n_active, n_active_nonexit; + uint32_t *uptimes, *bandwidths, *bandwidths_excluding_exits; + double *mtbfs; time_t now = time(NULL); /* initialize these all here, in case there are no routers */ @@ -1612,63 +1592,48 @@ dirserv_compute_performance_thresholds(routerlist_t *rl) total_bandwidth = 0; total_exit_bandwidth = 0; - uptimes = smartlist_create(); - mtbfs = smartlist_create(); - bandwidths = smartlist_create(); - bandwidths_excluding_exits = smartlist_create(); + n_active = n_active_nonexit = 0; + uptimes = tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers)); + bandwidths = tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers)); + bandwidths_excluding_exits = + tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers)); + mtbfs = tor_malloc(sizeof(double)*smartlist_len(rl->routers)); /* XXXX020 we should just use arrays and qsort. */ SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, { if (router_is_active(ri, now)) { - uint32_t *up = tor_malloc(sizeof(uint32_t)); - uint32_t *bw = tor_malloc(sizeof(uint32_t)); - uint32_t *mtbf = tor_malloc(sizeof(double)); + const char *id = ri->cache_info.identity_digest; + uint32_t bw; ri->is_exit = exit_policy_is_general_exit(ri->exit_policy); - *up = (uint32_t) real_uptime(ri, now); - smartlist_add(uptimes, up); - *mtbf = rep_hist_get_stability(ri->cache_info.identity_digest, now); - smartlist_add(mtbfs, mtbf); - *bw = router_get_advertised_bandwidth(ri); - total_bandwidth += *bw; + uptimes[n_active] = real_uptime(ri, now); + mtbfs[n_active] = rep_hist_get_stability(id, now); + bandwidths[n_active] = bw = router_get_advertised_bandwidth(ri); + total_bandwidth += bw; if (ri->is_exit && !ri->is_bad_exit) { - total_exit_bandwidth += *bw; + total_exit_bandwidth += bw; } else { - uint32_t *bw_not_exit = tor_malloc(sizeof(uint32_t)); - *bw_not_exit = *bw; - smartlist_add(bandwidths_excluding_exits, bw_not_exit); + bandwidths_excluding_exits[n_active_nonexit] = bw; + ++n_active_nonexit; } - smartlist_add(bandwidths, bw); + ++n_active; } }); - smartlist_sort(uptimes, _compare_uint32); - smartlist_sort(mtbfs, _compare_double); - smartlist_sort(bandwidths, _compare_uint32); - smartlist_sort(bandwidths_excluding_exits, _compare_uint32); - - if (smartlist_len(uptimes)) - stable_uptime = *(uint32_t*)smartlist_get(uptimes, - smartlist_len(uptimes)/2); - - if (smartlist_len(mtbfs)) - stable_mtbf = *(double*)smartlist_get(mtbfs, - smartlist_len(mtbfs)/2); - enough_mtbf_info = rep_hist_have_measured_enough_stability(); - - if (smartlist_len(bandwidths)) { - fast_bandwidth = *(uint32_t*)smartlist_get(bandwidths, - smartlist_len(bandwidths)/8); + if (n_active) { + stable_uptime = median_uint32(uptimes, n_active); + stable_mtbf = median_double(mtbfs, n_active); + fast_bandwidth = find_nth_uint32(bandwidths, n_active, n_active/8); + /* Now bandwidths is sorted. */ if (fast_bandwidth < ROUTER_REQUIRED_MIN_BANDWIDTH) - fast_bandwidth = *(uint32_t*)smartlist_get(bandwidths, - smartlist_len(bandwidths)/4); - guard_bandwidth_including_exits = - *(uint32_t*)smartlist_get(bandwidths, smartlist_len(bandwidths)/2); + fast_bandwidth = bandwidths[n_active/4]; + guard_bandwidth_including_exits = bandwidths[(n_active-1)/2]; } - if (smartlist_len(bandwidths_excluding_exits)) { + enough_mtbf_info = rep_hist_have_measured_enough_stability(); + + if (n_active_nonexit) { guard_bandwidth_excluding_exits = - *(uint32_t*)smartlist_get(bandwidths_excluding_exits, - smartlist_len(bandwidths_excluding_exits)/2); + median_uint32(bandwidths_excluding_exits, n_active_nonexit); } log(LOG_INFO, LD_DIRSERV, @@ -1678,14 +1643,10 @@ dirserv_compute_performance_thresholds(routerlist_t *rl) (unsigned long)guard_bandwidth_including_exits, (unsigned long)guard_bandwidth_excluding_exits); - SMARTLIST_FOREACH(uptimes, uint32_t *, up, tor_free(up)); - SMARTLIST_FOREACH(mtbfs, double *, mtbf, tor_free(mtbf)); - SMARTLIST_FOREACH(bandwidths, uint32_t *, bw, tor_free(bw)); - SMARTLIST_FOREACH(bandwidths_excluding_exits, uint32_t *, bw, tor_free(bw)); - smartlist_free(uptimes); - smartlist_free(mtbfs); - smartlist_free(bandwidths); - smartlist_free(bandwidths_excluding_exits); + tor_free(uptimes); + tor_free(mtbfs); + tor_free(bandwidths); + tor_free(bandwidths_excluding_exits); } /** Given a platform string as in a routerinfo_t (possibly null), return a diff --git a/src/or/dirvote.c b/src/or/dirvote.c index 463ecef3a9..938450bbcd 100644 --- a/src/or/dirvote.c +++ b/src/or/dirvote.c @@ -81,54 +81,6 @@ networkstatus_get_voter_by_id(networkstatus_vote_t *vote, return NULL; } -/** Helper for sorting a list of time_t*. */ -static int -_compare_times(const void **_a, const void **_b) -{ - const time_t *a = *_a, *b = *_b; - if (*a<*b) - return -1; - else if (*a>*b) - return 1; - else - return 0; -} - -/** Helper for sorting a list of int*. */ -static int -_compare_ints(const void **_a, const void **_b) -{ - const int *a = *_a, *b = *_b; - if (*a<*b) - return -1; - else if (*a>*b) - return 1; - else - return 0; -} - -/** Given a list of one or more time_t*, return the (low) median. */ -/*static*/ time_t -median_time(smartlist_t *times) -{ - int idx; - tor_assert(smartlist_len(times)); - smartlist_sort(times, _compare_times); - idx = (smartlist_len(times)-1)/2; - return *(time_t*)smartlist_get(times, idx); -} - -/** Given a list of one or more int*, return the (low) median. */ -/*static*/ int -median_int(smartlist_t *ints) -{ - int idx; - tor_assert(smartlist_len(ints)); - smartlist_sort(ints, _compare_ints); - idx = (smartlist_len(ints)-1)/2; - return *(int*)smartlist_get(ints, idx); -} - /** Given a vote vote (not a consensus!), return its associated * networkstatus_voter_info_t.*/ static networkstatus_voter_info_t * @@ -322,11 +274,12 @@ networkstatus_compute_consensus(smartlist_t *votes, /* Compute medians of time-related things, and figure out how many * routers we might need to talk about. */ { - smartlist_t *va_times = smartlist_create(); - smartlist_t *fu_times = smartlist_create(); - smartlist_t *vu_times = smartlist_create(); - smartlist_t *votesec_list = smartlist_create(); - smartlist_t *distsec_list = smartlist_create(); + int n_votes = smartlist_len(votes); + time_t *va_times = tor_malloc(n_votes * sizeof(time_t)); + time_t *fu_times = tor_malloc(n_votes * sizeof(time_t)); + time_t *vu_times = tor_malloc(n_votes * sizeof(time_t)); + int *votesec_list = tor_malloc(n_votes * sizeof(int)); + int *distsec_list = tor_malloc(n_votes * sizeof(int)); int n_versioning_clients = 0, n_versioning_servers = 0; smartlist_t *combined_client_versions = smartlist_create(); smartlist_t *combined_server_versions = smartlist_create(); @@ -334,11 +287,11 @@ networkstatus_compute_consensus(smartlist_t *votes, SMARTLIST_FOREACH(votes, networkstatus_vote_t *, v, { tor_assert(v->is_vote); - smartlist_add(va_times, &v->valid_after); - smartlist_add(fu_times, &v->fresh_until); - smartlist_add(vu_times, &v->valid_until); - smartlist_add(votesec_list, &v->vote_seconds); - smartlist_add(distsec_list, &v->dist_seconds); + va_times[v_sl_idx] = v->valid_after; + fu_times[v_sl_idx] = v->fresh_until; + vu_times[v_sl_idx] = v->valid_until; + votesec_list[v_sl_idx] = v->vote_seconds; + distsec_list[v_sl_idx] = v->dist_seconds; if (v->client_versions) { smartlist_t *cv = smartlist_create(); ++n_versioning_clients; @@ -360,11 +313,11 @@ networkstatus_compute_consensus(smartlist_t *votes, SMARTLIST_FOREACH(v->known_flags, const char *, cp, smartlist_add(flags, tor_strdup(cp))); }); - valid_after = median_time(va_times); - fresh_until = median_time(fu_times); - valid_until = median_time(vu_times); - vote_seconds = median_int(votesec_list); - dist_seconds = median_int(distsec_list); + valid_after = median_time(va_times, n_votes); + fresh_until = median_time(fu_times, n_votes); + valid_until = median_time(vu_times, n_votes); + vote_seconds = median_int(votesec_list, n_votes); + dist_seconds = median_int(distsec_list, n_votes); /* SMARTLIST_FOREACH(va_times, int*, i, @@ -400,11 +353,11 @@ networkstatus_compute_consensus(smartlist_t *votes, smartlist_sort_strings(flags); smartlist_uniq_strings(flags); - smartlist_free(va_times); - smartlist_free(fu_times); - smartlist_free(vu_times); - smartlist_free(votesec_list); - smartlist_free(distsec_list); + tor_free(va_times); + tor_free(fu_times); + tor_free(vu_times); + tor_free(votesec_list); + tor_free(distsec_list); } chunks = smartlist_create(); diff --git a/src/or/or.h b/src/or/or.h index ddff2d55a5..a1d1132d26 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2932,8 +2932,6 @@ const char *dirvote_get_pending_detached_signatures(void); const cached_dir_t *dirvote_get_vote(const char *id); #ifdef DIRVOTE_PRIVATE -time_t median_time(smartlist_t *times); -int median_int(smartlist_t *times); int networkstatus_check_voter_signature(networkstatus_vote_t *consensus, networkstatus_voter_info_t *voter, authority_cert_t *cert); @@ -3184,6 +3182,7 @@ int rep_hist_load_mtbf_data(time_t now); time_t rep_hist_downrate_old_runs(time_t now); double rep_hist_get_stability(const char *id, time_t when); +double rep_hist_get_weighted_fractional_uptime(const char *id, time_t when); int rep_hist_have_measured_enough_stability(void); void rep_hist_note_used_port(uint16_t port, time_t now); diff --git a/src/or/test.c b/src/or/test.c index 8503cdb609..bb673f953a 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -2386,42 +2386,29 @@ test_same_voter(networkstatus_voter_info_t *v1, } static void -test_util_dirvote_helpers(void) +test_util_order_functions(void) { - smartlist_t *sl = smartlist_create(); - int a=12,b=24,c=25,d=60,e=77; - time_t v=99, w=150, x=700, y=1000, z=time(NULL); + int lst[25], n = 0; + // int a=12,b=24,c=25,d=60,e=77; - test_assert(y