mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-10 13:13:44 +01:00
Add an address-set backend using a bloom filter.
We're going to need this to make our anti-DoS code (see 24902) more robust.
This commit is contained in:
parent
a2aaf9509b
commit
46bd2aed91
@ -1200,6 +1200,28 @@ tor_addr_hash(const tor_addr_t *addr)
|
||||
}
|
||||
}
|
||||
|
||||
/** As tor_addr_hash, but use a particular siphash key. */
|
||||
uint64_t
|
||||
tor_addr_keyed_hash(const struct sipkey *key, const tor_addr_t *addr)
|
||||
{
|
||||
/* This is duplicate code with tor_addr_hash, since this function needs to
|
||||
* be backportable all the way to 0.2.9. */
|
||||
|
||||
switch (tor_addr_family(addr)) {
|
||||
case AF_INET:
|
||||
return siphash24(&addr->addr.in_addr.s_addr, 4, key);
|
||||
case AF_UNSPEC:
|
||||
return 0x4e4d5342;
|
||||
case AF_INET6:
|
||||
return siphash24(&addr->addr.in6_addr.s6_addr, 16, key);
|
||||
default:
|
||||
/* LCOV_EXCL_START */
|
||||
tor_fragile_assert();
|
||||
return 0;
|
||||
/* LCOV_EXCL_END */
|
||||
}
|
||||
}
|
||||
|
||||
/** Return a newly allocated string with a representation of <b>addr</b>. */
|
||||
char *
|
||||
tor_addr_to_str_dup(const tor_addr_t *addr)
|
||||
|
@ -228,6 +228,8 @@ int tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2,
|
||||
#define tor_addr_eq(a,b) (0==tor_addr_compare((a),(b),CMP_EXACT))
|
||||
|
||||
uint64_t tor_addr_hash(const tor_addr_t *addr);
|
||||
struct sipkey;
|
||||
uint64_t tor_addr_keyed_hash(const struct sipkey *key, const tor_addr_t *addr);
|
||||
int tor_addr_is_v4(const tor_addr_t *addr);
|
||||
int tor_addr_is_internal_(const tor_addr_t *ip, int for_listening,
|
||||
const char *filename, int lineno);
|
||||
|
120
src/common/address_set.c
Normal file
120
src/common/address_set.c
Normal file
@ -0,0 +1,120 @@
|
||||
/* Copyright (c) 2018, The Tor Project, Inc. */
|
||||
/* See LICENSE for licensing information */
|
||||
|
||||
/**
|
||||
* \file address_set.c
|
||||
* \brief Implementation for a set of addresses.
|
||||
*
|
||||
* This module was first written on a semi-emergency basis to improve the
|
||||
* robustness of the anti-DoS module. As such, it's written in a pretty
|
||||
* conservative way, and should be susceptible to improvement later on.
|
||||
**/
|
||||
|
||||
#include "orconfig.h"
|
||||
#include "address_set.h"
|
||||
#include "address.h"
|
||||
#include "compat.h"
|
||||
#include "container.h"
|
||||
#include "crypto.h"
|
||||
#include "util.h"
|
||||
#include "siphash.h"
|
||||
|
||||
/** How many 64-bit siphash values to extract per address */
|
||||
#define N_HASHES 2
|
||||
/** How many bloom-filter bits we set per address. This is twice the N_HASHES
|
||||
* value, since we split the siphash outcome two 32-bit values. */
|
||||
#define N_BITS_PER_ITEM (N_HASHES * 2)
|
||||
|
||||
/* XXXX This code is largely duplicated with digestset_t. We should merge
|
||||
* them together into a common bloom-filter implementation. I'm keeping
|
||||
* them separate for now, though, since this module needs to be backported
|
||||
* all the way to 0.2.9.
|
||||
*
|
||||
* The main difference between digestset_t and this code is that we use
|
||||
* independent siphashes rather than messing around with bit-shifts. The
|
||||
* approach here is probably more sound, and we should prefer it if&when we
|
||||
* unify the implementations.
|
||||
**/
|
||||
|
||||
struct address_set_t {
|
||||
/** siphash keys to make N_HASHES independent hashes for each address. */
|
||||
struct sipkey key[N_HASHES];
|
||||
int mask; /**< One less than the number of bits in <b>ba</b>; always one less
|
||||
* than a power of two. */
|
||||
bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
|
||||
};
|
||||
|
||||
/**
|
||||
* Allocate and return an address_set, suitable for holding up to
|
||||
* <b>max_address_guess</b> distinct values.
|
||||
*/
|
||||
address_set_t *
|
||||
address_set_new(int max_addresses_guess)
|
||||
{
|
||||
/* See digestset_new() for rationale on this equation. */
|
||||
int n_bits = 1u << (tor_log2(max_addresses_guess)+5);
|
||||
|
||||
address_set_t *set = tor_malloc_zero(sizeof(address_set_t));
|
||||
set->mask = n_bits - 1;
|
||||
set->ba = bitarray_init_zero(n_bits);
|
||||
crypto_rand((char*) set->key, sizeof(set->key));
|
||||
|
||||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release all storage associated with <b>set</b>
|
||||
*/
|
||||
void
|
||||
address_set_free(address_set_t *set)
|
||||
{
|
||||
if (! set)
|
||||
return;
|
||||
|
||||
bitarray_free(set->ba);
|
||||
tor_free(set);
|
||||
}
|
||||
|
||||
/** Yield the bit index corresponding to 'val' for set. */
|
||||
#define BIT(set, val) ((val) & (set)->mask)
|
||||
|
||||
/**
|
||||
* Add <b>addr</b> to <b>set</b>.
|
||||
*
|
||||
* All future queries for <b>addr</b> in set will return true. Removing
|
||||
* items is not possible.
|
||||
*/
|
||||
void
|
||||
address_set_add(address_set_t *set, const struct tor_addr_t *addr)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N_HASHES; ++i) {
|
||||
uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
|
||||
uint32_t high_bits = (uint32_t)(h >> 32);
|
||||
uint32_t low_bits = (uint32_t)(h);
|
||||
bitarray_set(set->ba, BIT(set, high_bits));
|
||||
bitarray_set(set->ba, BIT(set, low_bits));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if <b>addr</b> if a member of <b>set</b>. (And probably,
|
||||
* return false if <b>addr</b> is not a member of set.)
|
||||
*/
|
||||
int
|
||||
address_set_probably_contains(address_set_t *set,
|
||||
const struct tor_addr_t *addr)
|
||||
{
|
||||
int i, matches = 0;
|
||||
for (i = 0; i < N_HASHES; ++i) {
|
||||
uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
|
||||
uint32_t high_bits = (uint32_t)(h >> 32);
|
||||
uint32_t low_bits = (uint32_t)(h);
|
||||
// Note that !! is necessary here, since bitarray_is_set does not
|
||||
// necessarily return 1 on true.
|
||||
matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
|
||||
matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
|
||||
}
|
||||
return matches == N_BITS_PER_ITEM;
|
||||
}
|
||||
|
33
src/common/address_set.h
Normal file
33
src/common/address_set.h
Normal file
@ -0,0 +1,33 @@
|
||||
/* Copyright (c) 2018, The Tor Project, Inc. */
|
||||
/* See LICENSE for licensing information */
|
||||
|
||||
/**
|
||||
* \file addressset.h
|
||||
* \brief Types to handle sets of addresses.
|
||||
*
|
||||
* This module was first written on a semi-emergency basis to improve the
|
||||
* robustness of the anti-DoS module. As such, it's written in a pretty
|
||||
* conservative way, and should be susceptible to improvement later on.
|
||||
**/
|
||||
|
||||
#ifndef TOR_ADDRESS_SET_H
|
||||
#define TOR_ADDRESS_SET_H
|
||||
|
||||
#include "orconfig.h"
|
||||
|
||||
/**
|
||||
* An address_set_t represents a set of tor_addr_t values. The implementation
|
||||
* is probabilistic: false negatives cannot occur but false positives are
|
||||
* possible.
|
||||
*/
|
||||
typedef struct address_set_t address_set_t;
|
||||
struct tor_addr_t;
|
||||
|
||||
address_set_t *address_set_new(int max_addresses_guess);
|
||||
void address_set_free(address_set_t *set);
|
||||
void address_set_add(address_set_t *set, const struct tor_addr_t *addr);
|
||||
int address_set_probably_contains(address_set_t *set,
|
||||
const struct tor_addr_t *addr);
|
||||
|
||||
#endif
|
||||
|
@ -80,6 +80,7 @@ src_common_libor_ctime_testing_a_CFLAGS = @CFLAGS_CONSTTIME@ $(TEST_CFLAGS)
|
||||
|
||||
LIBOR_A_SRC = \
|
||||
src/common/address.c \
|
||||
src/common/address_set.c \
|
||||
src/common/backtrace.c \
|
||||
src/common/compat.c \
|
||||
src/common/compat_threads.c \
|
||||
@ -135,6 +136,7 @@ src_common_libor_event_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS)
|
||||
|
||||
COMMONHEADERS = \
|
||||
src/common/address.h \
|
||||
src/common/address_set.h \
|
||||
src/common/backtrace.h \
|
||||
src/common/aes.h \
|
||||
src/common/ciphers.inc \
|
||||
|
Loading…
Reference in New Issue
Block a user