mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-24 04:13:28 +01:00
Refactor bloom filter logic not to be digest-specific.
Now the address-set code and the digest-set code share the same backend. Closes ticket 26510
This commit is contained in:
parent
860b9a9918
commit
bf89278c79
4
changes/ticket26510
Normal file
4
changes/ticket26510
Normal file
@ -0,0 +1,4 @@
|
||||
o Code simplification and refactoring:
|
||||
- Unify our bloom filter logic. Previously we had two copies of this
|
||||
code: one for routerlist filtering, and one for address set
|
||||
calculations. Closes ticket 26510.
|
@ -14,35 +14,19 @@
|
||||
#include "common/address_set.h"
|
||||
#include "common/address.h"
|
||||
#include "common/compat.h"
|
||||
#include "lib/container/bitarray.h"
|
||||
#include "lib/container/bloomfilt.h"
|
||||
#include "lib/crypt_ops/crypto_rand.h"
|
||||
#include "common/util.h"
|
||||
#include "siphash.h"
|
||||
|
||||
/** How many 64-bit siphash values to extract per address */
|
||||
#define N_HASHES 2
|
||||
/** How many bloom-filter bits we set per address. This is twice the N_HASHES
|
||||
* value, since we split the siphash output into two 32-bit values. */
|
||||
#define N_BITS_PER_ITEM (N_HASHES * 2)
|
||||
|
||||
/* XXXX This code is largely duplicated with digestset_t. We should merge
|
||||
* them together into a common bloom-filter implementation. I'm keeping
|
||||
* them separate for now, though, since this module needs to be backported
|
||||
* all the way to 0.2.9.
|
||||
*
|
||||
* The main difference between digestset_t and this code is that we use
|
||||
* independent siphashes rather than messing around with bit-shifts. The
|
||||
* approach here is probably more sound, and we should prefer it if&when we
|
||||
* unify the implementations.
|
||||
*/
|
||||
|
||||
struct address_set_t {
|
||||
/** siphash keys to make N_HASHES independent hashes for each address. */
|
||||
struct sipkey key[N_HASHES];
|
||||
int mask; /**< One less than the number of bits in <b>ba</b>; always one less
|
||||
* than a power of two. */
|
||||
bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
|
||||
};
|
||||
/* Wrap our hash function to have the signature that the bloom filter
|
||||
* needs. */
|
||||
static uint64_t
|
||||
bloomfilt_addr_hash(const struct sipkey *key,
|
||||
const void *item)
|
||||
{
|
||||
return tor_addr_keyed_hash(key, item);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate and return an address_set, suitable for holding up to
|
||||
@ -51,33 +35,11 @@ struct address_set_t {
|
||||
address_set_t *
|
||||
address_set_new(int max_addresses_guess)
|
||||
{
|
||||
/* See digestset_new() for rationale on this equation. */
|
||||
int n_bits = 1u << (tor_log2(max_addresses_guess)+5);
|
||||
|
||||
address_set_t *set = tor_malloc_zero(sizeof(address_set_t));
|
||||
set->mask = n_bits - 1;
|
||||
set->ba = bitarray_init_zero(n_bits);
|
||||
crypto_rand((char*) set->key, sizeof(set->key));
|
||||
|
||||
return set;
|
||||
uint8_t k[BLOOMFILT_KEY_LEN];
|
||||
crypto_rand((void*)k, sizeof(k));
|
||||
return bloomfilt_new(max_addresses_guess, bloomfilt_addr_hash, k);
|
||||
}
|
||||
|
||||
/**
|
||||
* Release all storage associated with <b>set</b>.
|
||||
*/
|
||||
void
|
||||
address_set_free(address_set_t *set)
|
||||
{
|
||||
if (! set)
|
||||
return;
|
||||
|
||||
bitarray_free(set->ba);
|
||||
tor_free(set);
|
||||
}
|
||||
|
||||
/** Yield the bit index corresponding to 'val' for set. */
|
||||
#define BIT(set, val) ((val) & (set)->mask)
|
||||
|
||||
/**
|
||||
* Add <b>addr</b> to <b>set</b>.
|
||||
*
|
||||
@ -87,14 +49,7 @@ address_set_free(address_set_t *set)
|
||||
void
|
||||
address_set_add(address_set_t *set, const struct tor_addr_t *addr)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N_HASHES; ++i) {
|
||||
uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
|
||||
uint32_t high_bits = (uint32_t)(h >> 32);
|
||||
uint32_t low_bits = (uint32_t)(h);
|
||||
bitarray_set(set->ba, BIT(set, high_bits));
|
||||
bitarray_set(set->ba, BIT(set, low_bits));
|
||||
}
|
||||
bloomfilt_add(set, addr);
|
||||
}
|
||||
|
||||
/** As address_set_add(), but take an ipv4 address in host order. */
|
||||
@ -111,18 +66,8 @@ address_set_add_ipv4h(address_set_t *set, uint32_t addr)
|
||||
* return false if <b>addr</b> is not a member of set.)
|
||||
*/
|
||||
int
|
||||
address_set_probably_contains(address_set_t *set,
|
||||
address_set_probably_contains(const address_set_t *set,
|
||||
const struct tor_addr_t *addr)
|
||||
{
|
||||
int i, matches = 0;
|
||||
for (i = 0; i < N_HASHES; ++i) {
|
||||
uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
|
||||
uint32_t high_bits = (uint32_t)(h >> 32);
|
||||
uint32_t low_bits = (uint32_t)(h);
|
||||
// Note that !! is necessary here, since bitarray_is_set does not
|
||||
// necessarily return 1 on true.
|
||||
matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
|
||||
matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
|
||||
}
|
||||
return matches == N_BITS_PER_ITEM;
|
||||
return bloomfilt_probably_contains(set, addr);
|
||||
}
|
||||
|
@ -4,10 +4,6 @@
|
||||
/**
|
||||
* \file address_set.h
|
||||
* \brief Types to handle sets of addresses.
|
||||
*
|
||||
* This module was first written on a semi-emergency basis to improve the
|
||||
* robustness of the anti-DoS module. As such, it's written in a pretty
|
||||
* conservative way, and should be susceptible to improvement later on.
|
||||
**/
|
||||
|
||||
#ifndef TOR_ADDRESS_SET_H
|
||||
@ -15,21 +11,21 @@
|
||||
|
||||
#include "orconfig.h"
|
||||
#include "lib/cc/torint.h"
|
||||
#include "lib/container/bloomfilt.h"
|
||||
|
||||
/**
|
||||
* An address_set_t represents a set of tor_addr_t values. The implementation
|
||||
* is probabilistic: false negatives cannot occur but false positives are
|
||||
* possible.
|
||||
*/
|
||||
typedef struct address_set_t address_set_t;
|
||||
typedef struct bloomfilt_t address_set_t;
|
||||
struct tor_addr_t;
|
||||
|
||||
address_set_t *address_set_new(int max_addresses_guess);
|
||||
void address_set_free(address_set_t *set);
|
||||
#define address_set_free(set) bloomfilt_free(set)
|
||||
void address_set_add(address_set_t *set, const struct tor_addr_t *addr);
|
||||
void address_set_add_ipv4h(address_set_t *set, uint32_t addr);
|
||||
int address_set_probably_contains(address_set_t *set,
|
||||
int address_set_probably_contains(const address_set_t *set,
|
||||
const struct tor_addr_t *addr);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -9,14 +9,75 @@
|
||||
**/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "lib/malloc/util_malloc.h"
|
||||
#include "lib/container/bloomfilt.h"
|
||||
#include "lib/intmath/bits.h"
|
||||
#include "lib/log/util_bug.h"
|
||||
#include "siphash.h"
|
||||
|
||||
/** Return a newly allocated digestset_t, optimized to hold a total of
|
||||
* <b>max_elements</b> digests with a reasonably low false positive weight. */
|
||||
digestset_t *
|
||||
digestset_new(int max_elements)
|
||||
/** How many bloom-filter bits we set per address. This is twice the
|
||||
* BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit
|
||||
* values. */
|
||||
#define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2)
|
||||
|
||||
struct bloomfilt_t {
|
||||
/** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each
|
||||
* items. */
|
||||
struct sipkey key[BLOOMFILT_N_HASHES];
|
||||
bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */
|
||||
uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always
|
||||
* one less than a power of two. */
|
||||
bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
|
||||
};
|
||||
|
||||
#define BIT(set, n) ((n) & (set)->mask)
|
||||
|
||||
/** Add the element <b>item</b> to <b>set</b>. */
|
||||
void
|
||||
bloomfilt_add(bloomfilt_t *set,
|
||||
const void *item)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
|
||||
uint64_t h = set->hashfn(&set->key[i], item);
|
||||
uint32_t high_bits = (uint32_t)(h >> 32);
|
||||
uint32_t low_bits = (uint32_t)(h);
|
||||
bitarray_set(set->ba, BIT(set, high_bits));
|
||||
bitarray_set(set->ba, BIT(set, low_bits));
|
||||
}
|
||||
}
|
||||
|
||||
/** If <b>item</b> is in <b>set</b>, return nonzero. Otherwise,
|
||||
* <em>probably</em> return zero. */
|
||||
int
|
||||
bloomfilt_probably_contains(const bloomfilt_t *set,
|
||||
const void *item)
|
||||
{
|
||||
int i, matches = 0;
|
||||
for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
|
||||
uint64_t h = set->hashfn(&set->key[i], item);
|
||||
uint32_t high_bits = (uint32_t)(h >> 32);
|
||||
uint32_t low_bits = (uint32_t)(h);
|
||||
// Note that !! is necessary here, since bitarray_is_set does not
|
||||
// necessarily return 1 on true.
|
||||
matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
|
||||
matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
|
||||
}
|
||||
return matches == N_BITS_PER_ITEM;
|
||||
}
|
||||
|
||||
/** Return a newly allocated bloomfilt_t, optimized to hold a total of
|
||||
* <b>max_elements</b> elements with a reasonably low false positive weight.
|
||||
*
|
||||
* Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide
|
||||
* functions of the items, and the key material <b>random_key</b> to
|
||||
* key the hash. There must be BLOOMFILT_KEY_LEN bytes in the supplied key.
|
||||
**/
|
||||
bloomfilt_t *
|
||||
bloomfilt_new(int max_elements,
|
||||
bloomfilt_hash_fn hashfn,
|
||||
const uint8_t *random_key)
|
||||
{
|
||||
/* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k
|
||||
* is the number of hash functions per entry, m is the bits in the array,
|
||||
@ -29,15 +90,21 @@ digestset_new(int max_elements)
|
||||
* conserve CPU, and k==13 is pretty big.
|
||||
*/
|
||||
int n_bits = 1u << (tor_log2(max_elements)+5);
|
||||
digestset_t *r = tor_malloc(sizeof(digestset_t));
|
||||
bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t));
|
||||
r->mask = n_bits - 1;
|
||||
r->ba = bitarray_init_zero(n_bits);
|
||||
|
||||
tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN);
|
||||
memcpy(r->key, random_key, sizeof(r->key));
|
||||
|
||||
r->hashfn = hashfn;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/** Free all storage held in <b>set</b>. */
|
||||
void
|
||||
digestset_free_(digestset_t *set)
|
||||
bloomfilt_free_(bloomfilt_t *set)
|
||||
{
|
||||
if (!set)
|
||||
return;
|
||||
|
@ -9,50 +9,27 @@
|
||||
#include "orconfig.h"
|
||||
#include "lib/cc/torint.h"
|
||||
#include "lib/container/bitarray.h"
|
||||
#include "siphash.h"
|
||||
|
||||
/** A set of digests, implemented as a Bloom filter. */
|
||||
typedef struct {
|
||||
int mask; /**< One less than the number of bits in <b>ba</b>; always one less
|
||||
* than a power of two. */
|
||||
bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
|
||||
} digestset_t;
|
||||
/** A set of elements, implemented as a Bloom filter. */
|
||||
typedef struct bloomfilt_t bloomfilt_t;
|
||||
|
||||
#define BIT(n) ((n) & set->mask)
|
||||
/** Add the digest <b>digest</b> to <b>set</b>. */
|
||||
static inline void
|
||||
digestset_add(digestset_t *set, const char *digest)
|
||||
{
|
||||
const uint64_t x = siphash24g(digest, 20);
|
||||
const uint32_t d1 = (uint32_t) x;
|
||||
const uint32_t d2 = (uint32_t)( (x>>16) + x);
|
||||
const uint32_t d3 = (uint32_t)( (x>>32) + x);
|
||||
const uint32_t d4 = (uint32_t)( (x>>48) + x);
|
||||
bitarray_set(set->ba, BIT(d1));
|
||||
bitarray_set(set->ba, BIT(d2));
|
||||
bitarray_set(set->ba, BIT(d3));
|
||||
bitarray_set(set->ba, BIT(d4));
|
||||
}
|
||||
/** How many 64-bit siphash values to extract per item. */
|
||||
#define BLOOMFILT_N_HASHES 2
|
||||
|
||||
/** If <b>digest</b> is in <b>set</b>, return nonzero. Otherwise,
|
||||
* <em>probably</em> return zero. */
|
||||
static inline int
|
||||
digestset_contains(const digestset_t *set, const char *digest)
|
||||
{
|
||||
const uint64_t x = siphash24g(digest, 20);
|
||||
const uint32_t d1 = (uint32_t) x;
|
||||
const uint32_t d2 = (uint32_t)( (x>>16) + x);
|
||||
const uint32_t d3 = (uint32_t)( (x>>32) + x);
|
||||
const uint32_t d4 = (uint32_t)( (x>>48) + x);
|
||||
return bitarray_is_set(set->ba, BIT(d1)) &&
|
||||
bitarray_is_set(set->ba, BIT(d2)) &&
|
||||
bitarray_is_set(set->ba, BIT(d3)) &&
|
||||
bitarray_is_set(set->ba, BIT(d4));
|
||||
}
|
||||
#undef BIT
|
||||
/** How much key material do we need to randomize hashes? */
|
||||
#define BLOOMFILT_KEY_LEN (BLOOMFILT_N_HASHES * 16)
|
||||
|
||||
digestset_t *digestset_new(int max_elements);
|
||||
void digestset_free_(digestset_t* set);
|
||||
#define digestset_free(set) FREE_AND_NULL(digestset_t, digestset_free_, (set))
|
||||
struct sipkey;
|
||||
typedef uint64_t (*bloomfilt_hash_fn)(const struct sipkey *key,
|
||||
const void *item);
|
||||
|
||||
#endif /* !defined(TOR_CONTAINER_H) */
|
||||
void bloomfilt_add(bloomfilt_t *set, const void *item);
|
||||
int bloomfilt_probably_contains(const bloomfilt_t *set, const void *item);
|
||||
|
||||
bloomfilt_t *bloomfilt_new(int max_elements,
|
||||
bloomfilt_hash_fn hashfn,
|
||||
const uint8_t *random_key);
|
||||
void bloomfilt_free_(bloomfilt_t* set);
|
||||
#define bloomfilt_free(set) FREE_AND_NULL(bloomfilt_t, bloomfilt_free_, (set))
|
||||
|
||||
#endif /* !defined(TOR_BLOOMFILT_H) */
|
||||
|
58
src/lib/crypt_ops/digestset.c
Normal file
58
src/lib/crypt_ops/digestset.c
Normal file
@ -0,0 +1,58 @@
|
||||
/* Copyright (c) 2018-2018, The Tor Project, Inc. */
|
||||
/* See LICENSE for licensing information */
|
||||
|
||||
/**
|
||||
* \file digestset.c
|
||||
* \brief Implementation for a set of digests
|
||||
**/
|
||||
|
||||
#include "orconfig.h"
|
||||
#include "lib/container/bloomfilt.h"
|
||||
#include "lib/crypt_ops/crypto_rand.h"
|
||||
#include "lib/defs/digest_sizes.h"
|
||||
#include "lib/crypt_ops/digestset.h"
|
||||
#include "siphash.h"
|
||||
|
||||
/* Wrap our hash function to have the signature that the bloom filter
|
||||
* needs. */
|
||||
static uint64_t
|
||||
bloomfilt_digest_hash(const struct sipkey *key,
|
||||
const void *item)
|
||||
{
|
||||
return siphash24(item, DIGEST_LEN, key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate and return an digestset, suitable for holding up to
|
||||
* <b>max_guess</b> distinct values.
|
||||
*/
|
||||
digestset_t *
|
||||
digestset_new(int max_guess)
|
||||
{
|
||||
uint8_t k[BLOOMFILT_KEY_LEN];
|
||||
crypto_rand((void*)k, sizeof(k));
|
||||
return bloomfilt_new(max_guess, bloomfilt_digest_hash, k);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add <b>digest</b> to <b>set</b>.
|
||||
*
|
||||
* All future queries for <b>digest</b> in set will return true. Removing
|
||||
* items is not possible.
|
||||
*/
|
||||
void
|
||||
digestset_add(digestset_t *set, const char *digest)
|
||||
{
|
||||
bloomfilt_add(set, digest);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if <b>digest</b> is a member of <b>set</b>. (And probably,
|
||||
* return false if <b>digest</b> is not a member of set.)
|
||||
*/
|
||||
int
|
||||
digestset_probably_contains(const digestset_t *set,
|
||||
const char *digest)
|
||||
{
|
||||
return bloomfilt_probably_contains(set, digest);
|
||||
}
|
32
src/lib/crypt_ops/digestset.h
Normal file
32
src/lib/crypt_ops/digestset.h
Normal file
@ -0,0 +1,32 @@
|
||||
/* Copyright (c) 2018, The Tor Project, Inc. */
|
||||
/* See LICENSE for licensing information */
|
||||
|
||||
/**
|
||||
* \file digestset.h
|
||||
* \brief Types to handle sets of digests, based on bloom filters.
|
||||
**/
|
||||
|
||||
#ifndef TOR_DIGESTSET_H
|
||||
#define TOR_DIGESTSET_H
|
||||
|
||||
#include "orconfig.h"
|
||||
#include "lib/cc/torint.h"
|
||||
#include "lib/container/bloomfilt.h"
|
||||
|
||||
/**
|
||||
* An digestset_t represents a set of 20-byte digest values. The
|
||||
* implementation is probabilistic: false negatives cannot occur but false
|
||||
* positives are possible.
|
||||
*/
|
||||
typedef struct bloomfilt_t digestset_t;
|
||||
|
||||
digestset_t *digestset_new(int max_addresses_guess);
|
||||
#define digestset_free(set) bloomfilt_free(set)
|
||||
void digestset_add(digestset_t *set, const char *addr);
|
||||
int digestset_probably_contains(const digestset_t *set,
|
||||
const char *addr);
|
||||
|
||||
// XXXX to remove.
|
||||
#define digestset_contains digestset_probably_contains
|
||||
|
||||
#endif
|
@ -19,7 +19,8 @@ src_lib_libtor_crypt_ops_a_SOURCES = \
|
||||
src/lib/crypt_ops/crypto_rand.c \
|
||||
src/lib/crypt_ops/crypto_rsa.c \
|
||||
src/lib/crypt_ops/crypto_s2k.c \
|
||||
src/lib/crypt_ops/crypto_util.c
|
||||
src/lib/crypt_ops/crypto_util.c \
|
||||
src/lib/crypt_ops/digestset.c
|
||||
|
||||
src_lib_libtor_crypt_ops_testing_a_SOURCES = \
|
||||
$(src_lib_libtor_crypt_ops_a_SOURCES)
|
||||
@ -41,4 +42,5 @@ noinst_HEADERS += \
|
||||
src/lib/crypt_ops/crypto_rand.h \
|
||||
src/lib/crypt_ops/crypto_rsa.h \
|
||||
src/lib/crypt_ops/crypto_s2k.h \
|
||||
src/lib/crypt_ops/crypto_util.h
|
||||
src/lib/crypt_ops/crypto_util.h \
|
||||
src/lib/crypt_ops/digestset.h
|
||||
|
@ -142,7 +142,7 @@
|
||||
#include "or/node_st.h"
|
||||
#include "or/origin_circuit_st.h"
|
||||
|
||||
#include "lib/container/bloomfilt.h"
|
||||
#include "lib/crypt_ops/digestset.h"
|
||||
|
||||
/** A list of existing guard selection contexts. */
|
||||
static smartlist_t *guard_contexts = NULL;
|
||||
|
@ -137,7 +137,7 @@
|
||||
#include "or/routerlist_st.h"
|
||||
#include "or/vote_routerstatus_st.h"
|
||||
|
||||
#include "lib/container/bloomfilt.h"
|
||||
#include "lib/crypt_ops/digestset.h"
|
||||
|
||||
// #define DEBUG_ROUTERLIST
|
||||
|
||||
|
@ -29,7 +29,7 @@
|
||||
#include "or/cell_st.h"
|
||||
#include "or/or_circuit_st.h"
|
||||
|
||||
#include "lib/container/bloomfilt.h"
|
||||
#include "lib/crypt_ops/digestset.h"
|
||||
|
||||
#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_PROCESS_CPUTIME_ID)
|
||||
static uint64_t nanostart;
|
||||
@ -376,7 +376,7 @@ bench_dmap(void)
|
||||
crypto_rand(d, 20);
|
||||
smartlist_add(sl2, tor_memdup(d, 20));
|
||||
}
|
||||
printf("nbits=%d\n", ds->mask+1);
|
||||
//printf("nbits=%d\n", ds->mask+1);
|
||||
|
||||
reset_perftime();
|
||||
|
||||
|
@ -10,8 +10,8 @@
|
||||
#include "test/test.h"
|
||||
|
||||
#include "lib/container/bitarray.h"
|
||||
#include "lib/container/bloomfilt.h"
|
||||
#include "lib/container/order.h"
|
||||
#include "lib/crypt_ops/digestset.h"
|
||||
|
||||
/** Helper: return a tristate based on comparing the strings in *<b>a</b> and
|
||||
* *<b>b</b>. */
|
||||
|
Loading…
Reference in New Issue
Block a user