mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-27 22:03:31 +01:00
r19233@catbus: nickm | 2008-04-08 13:06:34 -0400
When we remove old routers, use Bloom filters rather than a digestmap-based set in order to tell which ones we absolutely need to keep. This will save us roughly a kazillion little short-lived allocations for hash table entries. svn:r14318
This commit is contained in:
parent
0c9efd6a1e
commit
a627407fcb
@ -51,6 +51,10 @@ Changes in version 0.2.1.1-alpha - 2008-??-??
|
||||
- New --hush command-line option similar to --quiet. While --quiet
|
||||
disables all logging to the console on startup, --hush limits the
|
||||
output to messages of warning and error severity.
|
||||
- Use a Bloom filter rather than a digest-based set to track which
|
||||
descriptors we need to keep around when we're cleaning out old
|
||||
router descriptors. This speeds up the computation significantly, and
|
||||
may reduce fragmentation.
|
||||
|
||||
o Code simplifications and refactoring:
|
||||
- Refactor code using connection_ap_handshake_attach_circuit() to
|
||||
|
@ -1192,3 +1192,23 @@ IMPLEMENT_ORDER_FUNC(find_nth_double, double)
|
||||
IMPLEMENT_ORDER_FUNC(find_nth_uint32, uint32_t)
|
||||
IMPLEMENT_ORDER_FUNC(find_nth_long, long)
|
||||
|
||||
/** Return a newly allocated digestset_t, optimized to hold a total of
|
||||
* <b>max_elements</b> digests with a reasonably low false positive weight. */
|
||||
digestset_t *
|
||||
digestset_new(int max_elements)
|
||||
{
|
||||
int n_bits = 1u << (tor_log2(max_elements)+5);
|
||||
digestset_t *r = tor_malloc(sizeof(digestset_t));
|
||||
r->mask = n_bits - 1;
|
||||
r->ba = bitarray_init_zero(n_bits);
|
||||
return r;
|
||||
}
|
||||
|
||||
/** Free all storage held in <b>set</b>. */
|
||||
void
|
||||
digestset_free(digestset_t *set)
|
||||
{
|
||||
bitarray_free(set->ba);
|
||||
tor_free(set);
|
||||
}
|
||||
|
||||
|
@ -564,6 +564,49 @@ bitarray_is_set(bitarray_t *b, int bit)
|
||||
return b[bit >> BITARRAY_SHIFT] & (1u << (bit & BITARRAY_MASK));
|
||||
}
|
||||
|
||||
/** A set of digests, implemented as a Bloom filter. */
|
||||
typedef struct {
|
||||
int mask; /* One less than the number of bits in <b>ba</b>; always one less
|
||||
* than a power of two. */
|
||||
bitarray_t *ba; /* A bit array to implement the Bloom filter. */
|
||||
} digestset_t;
|
||||
|
||||
#define BIT(n) ((n) & set->mask)
|
||||
/** Add the digest <b>digest</b> to <b>set</b>. */
|
||||
static INLINE void
|
||||
digestset_add(digestset_t *set, const char *digest)
|
||||
{
|
||||
const uint32_t *p = (const uint32_t *)digest;
|
||||
const uint32_t d1 = p[0] + (p[1]>>16);
|
||||
const uint32_t d2 = p[1] + (p[2]>>16);
|
||||
const uint32_t d3 = p[2] + (p[3]>>16);
|
||||
const uint32_t d4 = p[3] + (p[0]>>16);
|
||||
bitarray_set(set->ba, BIT(d1));
|
||||
bitarray_set(set->ba, BIT(d2));
|
||||
bitarray_set(set->ba, BIT(d3));
|
||||
bitarray_set(set->ba, BIT(d4));
|
||||
}
|
||||
|
||||
/** If <b>digest</b> is in <b>set</b>, return nonzero. Otherwise,
|
||||
* <em>probably</em> return zero. */
|
||||
static INLINE int
|
||||
digestset_isin(const digestset_t *set, const char *digest)
|
||||
{
|
||||
const uint32_t *p = (const uint32_t *)digest;
|
||||
const uint32_t d1 = p[0] + (p[1]>>16);
|
||||
const uint32_t d2 = p[1] + (p[2]>>16);
|
||||
const uint32_t d3 = p[2] + (p[3]>>16);
|
||||
const uint32_t d4 = p[3] + (p[0]>>16);
|
||||
return bitarray_is_set(set->ba, BIT(d1)) &&
|
||||
bitarray_is_set(set->ba, BIT(d2)) &&
|
||||
bitarray_is_set(set->ba, BIT(d3)) &&
|
||||
bitarray_is_set(set->ba, BIT(d4));
|
||||
}
|
||||
#undef BIT
|
||||
|
||||
digestset_t *digestset_new(int max_elements);
|
||||
void digestset_free(digestset_t* set);
|
||||
|
||||
/* These functions, given an <b>array</b> of <b>n_elements</b>, return the
|
||||
* <b>nth</b> lowest element. <b>nth</b>=0 gives the lowest element;
|
||||
* <b>n_elements</b>-1 gives the highest; and (<b>n_elements</b>-1) / 2 gives
|
||||
|
@ -2957,7 +2957,7 @@ _compare_duration_idx(const void *_d1, const void *_d2)
|
||||
static void
|
||||
routerlist_remove_old_cached_routers_with_id(time_t now,
|
||||
time_t cutoff, int lo, int hi,
|
||||
digestmap_t *retain)
|
||||
digestset_t *retain)
|
||||
{
|
||||
int i, n = hi-lo+1;
|
||||
unsigned n_extra, n_rmv = 0;
|
||||
@ -2974,10 +2974,9 @@ routerlist_remove_old_cached_routers_with_id(time_t now,
|
||||
tor_assert(!memcmp(ident, r->identity_digest, DIGEST_LEN));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Check whether we need to do anything at all. */
|
||||
{
|
||||
int mdpr = directory_caches_dir_info(get_options()) ? 5 : 2;
|
||||
int mdpr = directory_caches_dir_info(get_options()) ? 2 : 1;
|
||||
if (n <= mdpr)
|
||||
return;
|
||||
n_extra = n - mdpr;
|
||||
@ -2993,7 +2992,7 @@ routerlist_remove_old_cached_routers_with_id(time_t now,
|
||||
signed_descriptor_t *r_next;
|
||||
lifespans[i-lo].idx = i;
|
||||
if (r->last_listed_as_valid_until >= now ||
|
||||
(retain && digestmap_get(retain, r->signed_descriptor_digest))) {
|
||||
(retain && digestset_isin(retain, r->signed_descriptor_digest))) {
|
||||
must_keep[i-lo] = 1;
|
||||
}
|
||||
if (i < hi) {
|
||||
@ -3049,10 +3048,11 @@ routerlist_remove_old_routers(void)
|
||||
time_t cutoff;
|
||||
routerinfo_t *router;
|
||||
signed_descriptor_t *sd;
|
||||
digestmap_t *retain;
|
||||
digestset_t *retain;
|
||||
int caches = directory_caches_dir_info(get_options());
|
||||
const networkstatus_t *consensus = networkstatus_get_latest_consensus();
|
||||
const smartlist_t *networkstatus_v2_list = networkstatus_get_v2_list();
|
||||
int n_expected_retain = 0;
|
||||
|
||||
trusted_dirs_remove_old_certs();
|
||||
|
||||
@ -3061,7 +3061,18 @@ routerlist_remove_old_routers(void)
|
||||
|
||||
// routerlist_assert_ok(routerlist);
|
||||
|
||||
retain = digestmap_new();
|
||||
n_expected_retain = smartlist_len(consensus->routerstatus_list);
|
||||
if (caches &&
|
||||
networkstatus_v2_list && smartlist_len(networkstatus_v2_list)) {
|
||||
SMARTLIST_FOREACH(networkstatus_v2_list, networkstatus_v2_t *, ns,
|
||||
n_expected_retain += smartlist_len(ns->entries));
|
||||
/*XXXX021 too much magic. */
|
||||
n_expected_retain /= (smartlist_len(networkstatus_v2_list)/2+1);
|
||||
}
|
||||
//log_notice(LD_DIR,"n_expected_retain=%d",n_expected_retain);
|
||||
|
||||
retain = digestset_new(n_expected_retain);
|
||||
|
||||
cutoff = now - OLD_ROUTER_DESC_MAX_AGE;
|
||||
/* Build a list of all the descriptors that _anybody_ lists. */
|
||||
if (caches) {
|
||||
@ -3077,7 +3088,7 @@ routerlist_remove_old_routers(void)
|
||||
* system will obsolete this whole thing in 0.2.0.x. */
|
||||
SMARTLIST_FOREACH(ns->entries, routerstatus_t *, rs,
|
||||
if (rs->published_on >= cutoff)
|
||||
digestmap_set(retain, rs->descriptor_digest, (void*)1));
|
||||
digestset_add(retain, rs->descriptor_digest));
|
||||
});
|
||||
}
|
||||
|
||||
@ -3085,13 +3096,13 @@ routerlist_remove_old_routers(void)
|
||||
if (consensus) {
|
||||
SMARTLIST_FOREACH(consensus->routerstatus_list, routerstatus_t *, rs,
|
||||
if (rs->published_on >= cutoff)
|
||||
digestmap_set(retain, rs->descriptor_digest, (void*)1));
|
||||
digestset_add(retain, rs->descriptor_digest));
|
||||
}
|
||||
|
||||
/* If we have a bunch of networkstatuses, we should consider pruning current
|
||||
* routers that are too old and that nobody recommends. (If we don't have
|
||||
* enough networkstatuses, then we should get more before we decide to kill
|
||||
* routers.) */
|
||||
/* If we have nearly as many networkstatuses as we want, we should consider
|
||||
* pruning current routers that are too old and that nobody recommends. (If
|
||||
* we don't have enough networkstatuses, then we should get more before we
|
||||
* decide to kill routers.) */
|
||||
if (!caches ||
|
||||
smartlist_len(networkstatus_v2_list) > get_n_v2_authorities() / 2) {
|
||||
cutoff = now - ROUTER_MAX_AGE;
|
||||
@ -3100,7 +3111,8 @@ routerlist_remove_old_routers(void)
|
||||
router = smartlist_get(routerlist->routers, i);
|
||||
if (router->cache_info.published_on <= cutoff &&
|
||||
router->cache_info.last_listed_as_valid_until < now &&
|
||||
!digestmap_get(retain,router->cache_info.signed_descriptor_digest)) {
|
||||
!digestset_isin(retain,
|
||||
router->cache_info.signed_descriptor_digest)) {
|
||||
/* Too old: remove it. (If we're a cache, just move it into
|
||||
* old_routers.) */
|
||||
log_info(LD_DIR,
|
||||
@ -3120,7 +3132,7 @@ routerlist_remove_old_routers(void)
|
||||
sd = smartlist_get(routerlist->old_routers, i);
|
||||
if (sd->published_on <= cutoff &&
|
||||
sd->last_listed_as_valid_until < now &&
|
||||
!digestmap_get(retain, sd->signed_descriptor_digest)) {
|
||||
!digestset_isin(retain, sd->signed_descriptor_digest)) {
|
||||
/* Too old. Remove it. */
|
||||
routerlist_remove_old(routerlist, sd, i--);
|
||||
}
|
||||
@ -3128,11 +3140,9 @@ routerlist_remove_old_routers(void)
|
||||
|
||||
//routerlist_assert_ok(routerlist);
|
||||
|
||||
log_info(LD_DIR, "We have %d live routers and %d old router descriptors. "
|
||||
"At most %d must be retained because of networkstatuses.",
|
||||
log_info(LD_DIR, "We have %d live routers and %d old router descriptors.",
|
||||
smartlist_len(routerlist->routers),
|
||||
smartlist_len(routerlist->old_routers),
|
||||
digestmap_size(retain));
|
||||
smartlist_len(routerlist->old_routers));
|
||||
|
||||
/* Now we might have to look at routerlist->old_routers for extraneous
|
||||
* members. (We'd keep all the members if we could, but we need to save
|
||||
@ -3141,9 +3151,10 @@ routerlist_remove_old_routers(void)
|
||||
* total number doesn't approach max_descriptors_per_router()*len(router).
|
||||
*/
|
||||
if (smartlist_len(routerlist->old_routers) <
|
||||
smartlist_len(routerlist->routers) * (caches?4:2))
|
||||
smartlist_len(routerlist->routers))
|
||||
goto done;
|
||||
|
||||
/* Sort by identity, then fix indices. */
|
||||
smartlist_sort(routerlist->old_routers, _compare_old_routers_by_identity);
|
||||
/* Fix indices. */
|
||||
for (i = 0; i < smartlist_len(routerlist->old_routers); ++i) {
|
||||
@ -3171,7 +3182,7 @@ routerlist_remove_old_routers(void)
|
||||
//routerlist_assert_ok(routerlist);
|
||||
|
||||
done:
|
||||
digestmap_free(retain, NULL);
|
||||
digestset_free(retain);
|
||||
}
|
||||
|
||||
/** We just added a new set of descriptors. Take whatever extra steps
|
||||
|
103
src/or/test.c
103
src/or/test.c
@ -1922,6 +1922,40 @@ test_util_bitarray(void)
|
||||
bitarray_free(ba);
|
||||
}
|
||||
|
||||
static void
|
||||
test_util_digestset(void)
|
||||
{
|
||||
smartlist_t *included = smartlist_create();
|
||||
char d[DIGEST_LEN];
|
||||
int i;
|
||||
int ok = 1;
|
||||
int false_positives = 0;
|
||||
digestset_t *set;
|
||||
|
||||
for (i = 0; i < 1000; ++i) {
|
||||
crypto_rand(d, DIGEST_LEN);
|
||||
smartlist_add(included, tor_memdup(d, DIGEST_LEN));
|
||||
}
|
||||
set = digestset_new(1000);
|
||||
SMARTLIST_FOREACH(included, const char *, cp,
|
||||
if (digestset_isin(set, cp))
|
||||
ok = 0);
|
||||
test_assert(ok);
|
||||
SMARTLIST_FOREACH(included, const char *, cp,
|
||||
digestset_add(set, cp));
|
||||
SMARTLIST_FOREACH(included, const char *, cp,
|
||||
if (!digestset_isin(set, cp))
|
||||
ok = 0);
|
||||
test_assert(ok);
|
||||
for (i = 0; i < 1000; ++i) {
|
||||
crypto_rand(d, DIGEST_LEN);
|
||||
if (digestset_isin(set, d))
|
||||
++false_positives;
|
||||
}
|
||||
test_assert(false_positives < 50); /* Should be far lower. */
|
||||
digestset_free(set);
|
||||
}
|
||||
|
||||
/* stop threads running at once. */
|
||||
static tor_mutex_t *_thread_test_mutex = NULL;
|
||||
/* make sure that threads have to run at the same time. */
|
||||
@ -3361,6 +3395,69 @@ bench_aes(void)
|
||||
crypto_free_cipher_env(c);
|
||||
}
|
||||
|
||||
static void
|
||||
bench_dmap(void)
|
||||
{
|
||||
smartlist_t *sl = smartlist_create();
|
||||
smartlist_t *sl2 = smartlist_create();
|
||||
struct timeval start, end, pt2, pt3, pt4;
|
||||
const int iters = 10000;
|
||||
const int elts = 4000;
|
||||
const int fpostests = 1000000;
|
||||
char d[20];
|
||||
int i,n=0, fp = 0;
|
||||
digestmap_t *dm = digestmap_new();
|
||||
digestset_t *ds = digestset_new(elts);
|
||||
|
||||
for (i = 0; i < elts; ++i) {
|
||||
crypto_rand(d, 20);
|
||||
smartlist_add(sl, tor_memdup(d, 20));
|
||||
}
|
||||
for (i = 0; i < elts; ++i) {
|
||||
crypto_rand(d, 20);
|
||||
smartlist_add(sl2, tor_memdup(d, 20));
|
||||
}
|
||||
printf("nbits=%d\n", ds->mask+1);
|
||||
|
||||
tor_gettimeofday(&start);
|
||||
for (i = 0; i < iters; ++i) {
|
||||
SMARTLIST_FOREACH(sl, const char *, cp, digestmap_set(dm, cp, (void*)1));
|
||||
}
|
||||
tor_gettimeofday(&pt2);
|
||||
for (i = 0; i < iters; ++i) {
|
||||
SMARTLIST_FOREACH(sl, const char *, cp, digestmap_get(dm, cp));
|
||||
SMARTLIST_FOREACH(sl2, const char *, cp, digestmap_get(dm, cp));
|
||||
}
|
||||
tor_gettimeofday(&pt3);
|
||||
for (i = 0; i < iters; ++i) {
|
||||
SMARTLIST_FOREACH(sl, const char *, cp, digestset_add(ds, cp));
|
||||
}
|
||||
tor_gettimeofday(&pt4);
|
||||
for (i = 0; i < iters; ++i) {
|
||||
SMARTLIST_FOREACH(sl, const char *, cp, n += digestset_isin(ds, cp));
|
||||
SMARTLIST_FOREACH(sl2, const char *, cp, n += digestset_isin(ds, cp));
|
||||
}
|
||||
tor_gettimeofday(&end);
|
||||
|
||||
for (i = 0; i < fpostests; ++i) {
|
||||
crypto_rand(d, 20);
|
||||
if (digestset_isin(ds, d)) ++fp;
|
||||
}
|
||||
|
||||
printf("%ld\n",(unsigned long)tv_udiff(&start, &pt2));
|
||||
printf("%ld\n",(unsigned long)tv_udiff(&pt2, &pt3));
|
||||
printf("%ld\n",(unsigned long)tv_udiff(&pt3, &pt4));
|
||||
printf("%ld\n",(unsigned long)tv_udiff(&pt4, &end));
|
||||
printf("-- %d\n", n);
|
||||
printf("++ %f\n", fp/(double)fpostests);
|
||||
digestmap_free(dm, NULL);
|
||||
digestset_free(ds);
|
||||
SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
|
||||
SMARTLIST_FOREACH(sl2, char *, cp, tor_free(cp));
|
||||
smartlist_free(sl);
|
||||
smartlist_free(sl2);
|
||||
}
|
||||
|
||||
static void
|
||||
test_util_mempool(void)
|
||||
{
|
||||
@ -3850,6 +3947,7 @@ static struct {
|
||||
SUBENT(util, datadir),
|
||||
SUBENT(util, smartlist),
|
||||
SUBENT(util, bitarray),
|
||||
SUBENT(util, digestset),
|
||||
SUBENT(util, mempool),
|
||||
SUBENT(util, memarea),
|
||||
SUBENT(util, strmap),
|
||||
@ -3960,6 +4058,11 @@ main(int c, char**v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (0) {
|
||||
bench_dmap();
|
||||
return 0;
|
||||
}
|
||||
|
||||
atexit(remove_directory);
|
||||
|
||||
printf("Running Tor unit tests on %s\n", get_uname());
|
||||
|
Loading…
Reference in New Issue
Block a user