Add an address-set backend using a bloom filter.

We're going to need this to make our anti-DoS code (see 24902) more
robust.
This commit is contained in:
Nick Mathewson 2018-02-07 09:49:35 -05:00 committed by David Goulet
parent a2aaf9509b
commit 46bd2aed91
5 changed files with 179 additions and 0 deletions

View File

@ -1200,6 +1200,28 @@ tor_addr_hash(const tor_addr_t *addr)
} }
} }
/** As tor_addr_hash, but use a particular siphash key. */
uint64_t
tor_addr_keyed_hash(const struct sipkey *key, const tor_addr_t *addr)
{
/* This is duplicate code with tor_addr_hash, since this function needs to
* be backportable all the way to 0.2.9. */
switch (tor_addr_family(addr)) {
case AF_INET:
return siphash24(&addr->addr.in_addr.s_addr, 4, key);
case AF_UNSPEC:
return 0x4e4d5342;
case AF_INET6:
return siphash24(&addr->addr.in6_addr.s6_addr, 16, key);
default:
/* LCOV_EXCL_START */
tor_fragile_assert();
return 0;
/* LCOV_EXCL_END */
}
}
/** Return a newly allocated string with a representation of <b>addr</b>. */ /** Return a newly allocated string with a representation of <b>addr</b>. */
char * char *
tor_addr_to_str_dup(const tor_addr_t *addr) tor_addr_to_str_dup(const tor_addr_t *addr)

View File

@ -228,6 +228,8 @@ int tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2,
#define tor_addr_eq(a,b) (0==tor_addr_compare((a),(b),CMP_EXACT)) #define tor_addr_eq(a,b) (0==tor_addr_compare((a),(b),CMP_EXACT))
uint64_t tor_addr_hash(const tor_addr_t *addr); uint64_t tor_addr_hash(const tor_addr_t *addr);
struct sipkey;
uint64_t tor_addr_keyed_hash(const struct sipkey *key, const tor_addr_t *addr);
int tor_addr_is_v4(const tor_addr_t *addr); int tor_addr_is_v4(const tor_addr_t *addr);
int tor_addr_is_internal_(const tor_addr_t *ip, int for_listening, int tor_addr_is_internal_(const tor_addr_t *ip, int for_listening,
const char *filename, int lineno); const char *filename, int lineno);

120
src/common/address_set.c Normal file
View File

@ -0,0 +1,120 @@
/* Copyright (c) 2018, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
* \file address_set.c
* \brief Implementation for a set of addresses.
*
* This module was first written on a semi-emergency basis to improve the
* robustness of the anti-DoS module. As such, it's written in a pretty
* conservative way, and should be susceptible to improvement later on.
**/
#include "orconfig.h"
#include "address_set.h"
#include "address.h"
#include "compat.h"
#include "container.h"
#include "crypto.h"
#include "util.h"
#include "siphash.h"
/** How many 64-bit siphash values to extract per address */
#define N_HASHES 2
/** How many bloom-filter bits we set per address. This is twice the N_HASHES
* value, since we split the siphash outcome two 32-bit values. */
#define N_BITS_PER_ITEM (N_HASHES * 2)
/* XXXX This code is largely duplicated with digestset_t. We should merge
* them together into a common bloom-filter implementation. I'm keeping
* them separate for now, though, since this module needs to be backported
* all the way to 0.2.9.
*
* The main difference between digestset_t and this code is that we use
* independent siphashes rather than messing around with bit-shifts. The
* approach here is probably more sound, and we should prefer it if&when we
* unify the implementations.
**/
struct address_set_t {
/** siphash keys to make N_HASHES independent hashes for each address. */
struct sipkey key[N_HASHES];
int mask; /**< One less than the number of bits in <b>ba</b>; always one less
* than a power of two. */
bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
};
/**
* Allocate and return an address_set, suitable for holding up to
* <b>max_address_guess</b> distinct values.
*/
address_set_t *
address_set_new(int max_addresses_guess)
{
/* See digestset_new() for rationale on this equation. */
int n_bits = 1u << (tor_log2(max_addresses_guess)+5);
address_set_t *set = tor_malloc_zero(sizeof(address_set_t));
set->mask = n_bits - 1;
set->ba = bitarray_init_zero(n_bits);
crypto_rand((char*) set->key, sizeof(set->key));
return set;
}
/**
* Release all storage associated with <b>set</b>
*/
void
address_set_free(address_set_t *set)
{
if (! set)
return;
bitarray_free(set->ba);
tor_free(set);
}
/** Yield the bit index corresponding to 'val' for set. */
#define BIT(set, val) ((val) & (set)->mask)
/**
* Add <b>addr</b> to <b>set</b>.
*
* All future queries for <b>addr</b> in set will return true. Removing
* items is not possible.
*/
void
address_set_add(address_set_t *set, const struct tor_addr_t *addr)
{
int i;
for (i = 0; i < N_HASHES; ++i) {
uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
uint32_t high_bits = (uint32_t)(h >> 32);
uint32_t low_bits = (uint32_t)(h);
bitarray_set(set->ba, BIT(set, high_bits));
bitarray_set(set->ba, BIT(set, low_bits));
}
}
/**
* Return true if <b>addr</b> if a member of <b>set</b>. (And probably,
* return false if <b>addr</b> is not a member of set.)
*/
int
address_set_probably_contains(address_set_t *set,
const struct tor_addr_t *addr)
{
int i, matches = 0;
for (i = 0; i < N_HASHES; ++i) {
uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
uint32_t high_bits = (uint32_t)(h >> 32);
uint32_t low_bits = (uint32_t)(h);
// Note that !! is necessary here, since bitarray_is_set does not
// necessarily return 1 on true.
matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
}
return matches == N_BITS_PER_ITEM;
}

33
src/common/address_set.h Normal file
View File

@ -0,0 +1,33 @@
/* Copyright (c) 2018, The Tor Project, Inc. */
/* See LICENSE for licensing information */
/**
* \file addressset.h
* \brief Types to handle sets of addresses.
*
* This module was first written on a semi-emergency basis to improve the
* robustness of the anti-DoS module. As such, it's written in a pretty
* conservative way, and should be susceptible to improvement later on.
**/
#ifndef TOR_ADDRESS_SET_H
#define TOR_ADDRESS_SET_H
#include "orconfig.h"
/**
* An address_set_t represents a set of tor_addr_t values. The implementation
* is probabilistic: false negatives cannot occur but false positives are
* possible.
*/
typedef struct address_set_t address_set_t;
struct tor_addr_t;
address_set_t *address_set_new(int max_addresses_guess);
void address_set_free(address_set_t *set);
void address_set_add(address_set_t *set, const struct tor_addr_t *addr);
int address_set_probably_contains(address_set_t *set,
const struct tor_addr_t *addr);
#endif

View File

@ -80,6 +80,7 @@ src_common_libor_ctime_testing_a_CFLAGS = @CFLAGS_CONSTTIME@ $(TEST_CFLAGS)
LIBOR_A_SRC = \ LIBOR_A_SRC = \
src/common/address.c \ src/common/address.c \
src/common/address_set.c \
src/common/backtrace.c \ src/common/backtrace.c \
src/common/compat.c \ src/common/compat.c \
src/common/compat_threads.c \ src/common/compat_threads.c \
@ -135,6 +136,7 @@ src_common_libor_event_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS)
COMMONHEADERS = \ COMMONHEADERS = \
src/common/address.h \ src/common/address.h \
src/common/address_set.h \
src/common/backtrace.h \ src/common/backtrace.h \
src/common/aes.h \ src/common/aes.h \
src/common/ciphers.inc \ src/common/ciphers.inc \