Finish renaming digestset_contains to digestset_probably_contains

Since bloom filters are probabilistic, it's nice to make it clear
that the "contains" operation can have false positives.
This commit is contained in:
Nick Mathewson 2018-06-26 13:20:54 -04:00
parent bf89278c79
commit ebbb0348dc
5 changed files with 14 additions and 14 deletions

View File

@ -26,7 +26,4 @@ void digestset_add(digestset_t *set, const char *addr);
int digestset_probably_contains(const digestset_t *set,
const char *addr);
// XXXX to remove.
#define digestset_contains digestset_probably_contains
#endif

View File

@ -1065,7 +1065,7 @@ get_eligible_guards(const or_options_t *options,
continue;
}
++n_guards;
if (digestset_contains(sampled_guard_ids, node->identity))
if (digestset_probably_contains(sampled_guard_ids, node->identity))
continue;
smartlist_add(eligible_guards, (node_t*)node);
} SMARTLIST_FOREACH_END(node);

View File

@ -4116,7 +4116,8 @@ routerlist_remove_old_cached_routers_with_id(time_t now,
signed_descriptor_t *r_next;
lifespans[i-lo].idx = i;
if (r->last_listed_as_valid_until >= now ||
(retain && digestset_contains(retain, r->signed_descriptor_digest))) {
(retain && digestset_probably_contains(retain,
r->signed_descriptor_digest))) {
must_keep[i-lo] = 1;
}
if (i < hi) {
@ -4211,7 +4212,7 @@ routerlist_remove_old_routers(void)
router = smartlist_get(routerlist->routers, i);
if (router->cache_info.published_on <= cutoff &&
router->cache_info.last_listed_as_valid_until < now &&
!digestset_contains(retain,
!digestset_probably_contains(retain,
router->cache_info.signed_descriptor_digest)) {
/* Too old: remove it. (If we're a cache, just move it into
* old_routers.) */
@ -4232,7 +4233,7 @@ routerlist_remove_old_routers(void)
sd = smartlist_get(routerlist->old_routers, i);
if (sd->published_on <= cutoff &&
sd->last_listed_as_valid_until < now &&
!digestset_contains(retain, sd->signed_descriptor_digest)) {
!digestset_probably_contains(retain, sd->signed_descriptor_digest)) {
/* Too old. Remove it. */
routerlist_remove_old(routerlist, sd, i--);
}

View File

@ -404,18 +404,20 @@ bench_dmap(void)
NANOCOUNT(pt3, pt4, iters*elts));
for (i = 0; i < iters; ++i) {
SMARTLIST_FOREACH(sl, const char *, cp, n += digestset_contains(ds, cp));
SMARTLIST_FOREACH(sl2, const char *, cp, n += digestset_contains(ds, cp));
SMARTLIST_FOREACH(sl, const char *, cp,
n += digestset_probably_contains(ds, cp));
SMARTLIST_FOREACH(sl2, const char *, cp,
n += digestset_probably_contains(ds, cp));
}
end = perftime();
printf("digestset_contains: %.2f ns per element.\n",
printf("digestset_probably_contains: %.2f ns per element.\n",
NANOCOUNT(pt4, end, iters*elts*2));
/* We need to use this, or else the whole loop gets optimized out. */
printf("Hits == %d\n", n);
for (i = 0; i < fpostests; ++i) {
crypto_rand(d, 20);
if (digestset_contains(ds, d)) ++fp;
if (digestset_probably_contains(ds, d)) ++fp;
}
printf("False positive rate on digestset: %.2f%%\n",
(fp/(double)fpostests)*100);

View File

@ -644,18 +644,18 @@ test_container_digestset(void *arg)
}
set = digestset_new(1000);
SMARTLIST_FOREACH(included, const char *, cp,
if (digestset_contains(set, cp))
if (digestset_probably_contains(set, cp))
ok = 0);
tt_assert(ok);
SMARTLIST_FOREACH(included, const char *, cp,
digestset_add(set, cp));
SMARTLIST_FOREACH(included, const char *, cp,
if (!digestset_contains(set, cp))
if (!digestset_probably_contains(set, cp))
ok = 0);
tt_assert(ok);
for (i = 0; i < 1000; ++i) {
crypto_rand(d, DIGEST_LEN);
if (digestset_contains(set, d))
if (digestset_probably_contains(set, d))
++false_positives;
}
tt_int_op(50, OP_GT, false_positives); /* Should be far lower. */