From 0e97c8e23e2572c14dd0f4f4fbfca77ee8a48be2 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 7 Feb 2014 17:38:16 -0500 Subject: [PATCH] Siphash-2-4 is now our hash in nearly all cases. I've made an exception for cases where I'm sure that users can't influence the inputs. This is likely to cause a slowdown somewhere, but it's safer to siphash everything and *then* look for cases to optimize. This patch doesn't actually get us any _benefit_ from siphash yet, since we don't really randomize the key at any point. --- src/common/address.c | 38 +++++++++++++++++++++++++++++++------- src/common/address.h | 3 ++- src/common/container.c | 10 ++-------- src/common/container.h | 21 +++++++++++---------- src/ext/siphash.h | 1 + src/or/channel.c | 7 +------ src/or/dns.c | 2 +- src/or/fp_pair.c | 13 ++----------- src/or/geoip.c | 8 +++++--- src/or/microdesc.c | 7 +------ src/or/nodelist.c | 9 +-------- src/or/policies.c | 30 +++++++++++++++++------------- 12 files changed, 75 insertions(+), 74 deletions(-) diff --git a/src/common/address.c b/src/common/address.c index b9f2d93154..69049fa0af 100644 --- a/src/common/address.c +++ b/src/common/address.c @@ -874,6 +874,32 @@ tor_addr_copy(tor_addr_t *dest, const tor_addr_t *src) memcpy(dest, src, sizeof(tor_addr_t)); } +/** Copy a tor_addr_t from src to dest, taking extra case to + * copy only the well-defined portions. Used for computing hashes of + * addresses. + */ +void +tor_addr_copy_tight(tor_addr_t *dest, const tor_addr_t *src) +{ + tor_assert(src != dest); + tor_assert(src); + tor_assert(dest); + memset(dest, 0, sizeof(tor_addr_t)); + dest->family = src->family; + switch (tor_addr_family(src)) + { + case AF_INET: + dest->addr.in_addr.s_addr = src->addr.in_addr.s_addr; + break; + case AF_INET6: + memcpy(dest->addr.in6_addr.s6_addr, src->addr.in6_addr.s6_addr, 16); + case AF_UNSPEC: + break; + default: + tor_fragile_assert(); + } +} + /** Given two addresses addr1 and addr2, return 0 if the two * addresses are equivalent under the mask mbits, less than 0 if addr1 * precedes addr2, and greater than 0 otherwise. @@ -995,19 +1021,17 @@ tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2, } } -/** Return a hash code based on the address addr */ -unsigned int +/** Return a hash code based on the address addr. DOCDOC extra */ +uint64_t tor_addr_hash(const tor_addr_t *addr) { switch (tor_addr_family(addr)) { case AF_INET: - return tor_addr_to_ipv4h(addr); + return siphash24g(&addr->addr.in_addr.s_addr, 4); case AF_UNSPEC: return 0x4e4d5342; - case AF_INET6: { - const uint32_t *u = tor_addr_to_in6_addr32(addr); - return u[0] + u[1] + u[2] + u[3]; - } + case AF_INET6: + return siphash24g(&addr->addr.in6_addr.s6_addr, 16); default: tor_fragile_assert(); return 0; diff --git a/src/common/address.h b/src/common/address.h index 77e5855346..d41c2f570f 100644 --- a/src/common/address.h +++ b/src/common/address.h @@ -167,7 +167,7 @@ int tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2, * "exactly". */ #define tor_addr_eq(a,b) (0==tor_addr_compare((a),(b),CMP_EXACT)) -unsigned int tor_addr_hash(const tor_addr_t *addr); +uint64_t tor_addr_hash(const tor_addr_t *addr); int tor_addr_is_v4(const tor_addr_t *addr); int tor_addr_is_internal_(const tor_addr_t *ip, int for_listening, const char *filename, int lineno); @@ -192,6 +192,7 @@ const char * tor_addr_to_str(char *dest, const tor_addr_t *addr, size_t len, int decorate); int tor_addr_parse(tor_addr_t *addr, const char *src); void tor_addr_copy(tor_addr_t *dest, const tor_addr_t *src); +void tor_addr_copy_tight(tor_addr_t *dest, const tor_addr_t *src); void tor_addr_from_ipv4n(tor_addr_t *dest, uint32_t v4addr); /** Set dest to the IPv4 address encoded in v4addr in host * order. */ diff --git a/src/common/container.c b/src/common/container.c index 476dc82913..f489430ca4 100644 --- a/src/common/container.c +++ b/src/common/container.c @@ -1004,7 +1004,7 @@ strmap_entries_eq(const strmap_entry_t *a, const strmap_entry_t *b) static INLINE unsigned int strmap_entry_hash(const strmap_entry_t *a) { - return ht_string_hash(a->key); + return (unsigned) siphash24g(a->key, strlen(a->key)); } /** Helper: compare digestmap_entry_t objects by key value. */ @@ -1018,13 +1018,7 @@ digestmap_entries_eq(const digestmap_entry_t *a, const digestmap_entry_t *b) static INLINE unsigned int digestmap_entry_hash(const digestmap_entry_t *a) { -#if SIZEOF_INT != 8 - const uint32_t *p = (const uint32_t*)a->key; - return p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4]; -#else - const uint64_t *p = (const uint64_t*)a->key; - return p[0] ^ p[1]; -#endif + return (unsigned) siphash24g(a->key, DIGEST_LEN); } HT_PROTOTYPE(strmap_impl, strmap_entry_t, node, strmap_entry_hash, diff --git a/src/common/container.h b/src/common/container.h index 1bcc540665..a4691a76c6 100644 --- a/src/common/container.h +++ b/src/common/container.h @@ -7,6 +7,7 @@ #define TOR_CONTAINER_H #include "util.h" +#include "siphash.h" /** A resizeable list of pointers, with associated helpful functionality. * @@ -610,11 +611,11 @@ typedef struct { static INLINE void digestset_add(digestset_t *set, const char *digest) { - const uint32_t *p = (const uint32_t *)digest; - const uint32_t d1 = p[0] + (p[1]>>16); - const uint32_t d2 = p[1] + (p[2]>>16); - const uint32_t d3 = p[2] + (p[3]>>16); - const uint32_t d4 = p[3] + (p[0]>>16); + const uint64_t x = siphash24g(digest, 20); + const uint32_t d1 = (uint32_t) x; + const uint32_t d2 = (uint32_t)( (x>>16) + x); + const uint32_t d3 = (uint32_t)( (x>>32) + x); + const uint32_t d4 = (uint32_t)( (x>>48) + x); bitarray_set(set->ba, BIT(d1)); bitarray_set(set->ba, BIT(d2)); bitarray_set(set->ba, BIT(d3)); @@ -626,11 +627,11 @@ digestset_add(digestset_t *set, const char *digest) static INLINE int digestset_contains(const digestset_t *set, const char *digest) { - const uint32_t *p = (const uint32_t *)digest; - const uint32_t d1 = p[0] + (p[1]>>16); - const uint32_t d2 = p[1] + (p[2]>>16); - const uint32_t d3 = p[2] + (p[3]>>16); - const uint32_t d4 = p[3] + (p[0]>>16); + const uint64_t x = siphash24g(digest, 20); + const uint32_t d1 = (uint32_t) x; + const uint32_t d2 = (uint32_t)( (x>>16) + x); + const uint32_t d3 = (uint32_t)( (x>>32) + x); + const uint32_t d4 = (uint32_t)( (x>>48) + x); return bitarray_is_set(set->ba, BIT(d1)) && bitarray_is_set(set->ba, BIT(d2)) && bitarray_is_set(set->ba, BIT(d3)) && diff --git a/src/ext/siphash.h b/src/ext/siphash.h index 964fe7df99..d9b34b8980 100644 --- a/src/ext/siphash.h +++ b/src/ext/siphash.h @@ -1,5 +1,6 @@ #ifndef SIPHASH_H #define SIPHASH_H + struct sipkey { uint64_t k0; uint64_t k1; diff --git a/src/or/channel.c b/src/or/channel.c index a345bab20c..9f6887588e 100644 --- a/src/or/channel.c +++ b/src/or/channel.c @@ -95,12 +95,7 @@ typedef struct channel_idmap_entry_s { static INLINE unsigned channel_idmap_hash(const channel_idmap_entry_t *ent) { - const unsigned *a = (const unsigned *)ent->digest; -#if SIZEOF_INT == 4 - return a[0] ^ a[1] ^ a[2] ^ a[3] ^ a[4]; -#elif SIZEOF_INT == 8 - return a[0] ^ a[1]; -#endif + return (unsigned) siphash24g(ent->digest, DIGEST_LEN); } static INLINE int diff --git a/src/or/dns.c b/src/or/dns.c index a1fe0de1d7..a88a46eb71 100644 --- a/src/or/dns.c +++ b/src/or/dns.c @@ -239,7 +239,7 @@ cached_resolves_eq(cached_resolve_t *a, cached_resolve_t *b) static INLINE unsigned int cached_resolve_hash(cached_resolve_t *a) { - return ht_string_hash(a->address); + return (unsigned) siphash24g((const uint8_t*)a->address, strlen(a->address)); } HT_PROTOTYPE(cache_map, cached_resolve_t, node, cached_resolve_hash, diff --git a/src/or/fp_pair.c b/src/or/fp_pair.c index 4d8a835c83..55e4c89a42 100644 --- a/src/or/fp_pair.c +++ b/src/or/fp_pair.c @@ -32,17 +32,8 @@ fp_pair_map_entries_eq(const fp_pair_map_entry_t *a, static INLINE unsigned int fp_pair_map_entry_hash(const fp_pair_map_entry_t *a) { - const uint32_t *p; - unsigned int hash; - - p = (const uint32_t *)(a->key.first); - /* Hashes are 20 bytes long, so 5 times uint32_t */ - hash = p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4]; - /* Now XOR in the second fingerprint */ - p = (const uint32_t *)(a->key.second); - hash ^= p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4]; - - return hash; + tor_assert(sizeof(a->key) == DIGEST_LEN*2); + return (unsigned) siphash24g(&a->key, DIGEST_LEN*2); } /* diff --git a/src/or/geoip.c b/src/or/geoip.c index dc4730c810..6088f5d194 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -486,10 +486,12 @@ static HT_HEAD(clientmap, clientmap_entry_t) client_history = static INLINE unsigned clientmap_entry_hash(const clientmap_entry_t *a) { - unsigned h = tor_addr_hash(&a->addr); + unsigned h = (unsigned) tor_addr_hash(&a->addr); + if (a->transport_name) - h += ht_string_hash(a->transport_name); - return ht_improve_hash(h); + h += (unsigned) siphash24g(a->transport_name, strlen(a->transport_name)); + + return h; } /** Hashtable helper: compare two clientmap_entry_t values for equality. */ static INLINE int diff --git a/src/or/microdesc.c b/src/or/microdesc.c index 11249910ca..8052ca998c 100644 --- a/src/or/microdesc.c +++ b/src/or/microdesc.c @@ -45,12 +45,7 @@ struct microdesc_cache_t { static INLINE unsigned int microdesc_hash_(microdesc_t *md) { - unsigned *d = (unsigned*)md->digest; -#if SIZEOF_INT == 4 - return d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[7]; -#else - return d[0] ^ d[1] ^ d[2] ^ d[3]; -#endif + return (unsigned) siphash24g(md->digest, sizeof(md->digest)); } /** Helper: compares a and for equality for hash-table purposes. */ diff --git a/src/or/nodelist.c b/src/or/nodelist.c index 402fb2e96a..03fa836d4e 100644 --- a/src/or/nodelist.c +++ b/src/or/nodelist.c @@ -43,14 +43,7 @@ typedef struct nodelist_t { static INLINE unsigned int node_id_hash(const node_t *node) { -#if SIZEOF_INT == 4 - const uint32_t *p = (const uint32_t*)node->identity; - return p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4]; -#elif SIZEOF_INT == 8 - const uint64_t *p = (const uint32_t*)node->identity; - const uint32_t *p32 = (const uint32_t*)node->identity; - return p[0] ^ p[1] ^ p32[4]; -#endif + return (unsigned) siphash24g(node->identity, DIGEST_LEN); } static INLINE unsigned int diff --git a/src/or/policies.c b/src/or/policies.c index be4da55061..05377ec205 100644 --- a/src/or/policies.c +++ b/src/or/policies.c @@ -597,21 +597,25 @@ policy_eq(policy_map_ent_t *a, policy_map_ent_t *b) /** Return a hashcode for ent */ static unsigned int -policy_hash(policy_map_ent_t *ent) +policy_hash(const policy_map_ent_t *ent) { - addr_policy_t *a = ent->policy; - unsigned int r; - if (a->is_private) - r = 0x1234abcd; - else - r = tor_addr_hash(&a->addr); - r += a->prt_min << 8; - r += a->prt_max << 16; - r += a->maskbits; - if (a->policy_type == ADDR_POLICY_REJECT) - r ^= 0xffffffff; + const addr_policy_t *a = ent->policy; + addr_policy_t aa; + memset(&aa, 0, sizeof(aa)); - return r; + aa.prt_min = a->prt_min; + aa.prt_max = a->prt_max; + aa.maskbits = a->maskbits; + aa.policy_type = a->policy_type; + aa.is_private = a->is_private; + + if (a->is_private) { + aa.is_private = 1; + } else { + tor_addr_copy_tight(&aa.addr, &a->addr); + } + + return (unsigned) siphash24g(&aa, sizeof(aa)); } HT_PROTOTYPE(policy_map, policy_map_ent_t, node, policy_hash,