Add better support to obfuscate statistics.

This commit is contained in:
Karsten Loesing 2014-12-08 15:00:58 +01:00
parent 447ece46f5
commit 7cd53b75c1
3 changed files with 102 additions and 0 deletions

View File

@ -513,6 +513,51 @@ round_uint64_to_next_multiple_of(uint64_t number, uint64_t divisor)
return number; return number;
} }
/** Return the lowest x in [INT64_MIN, INT64_MAX] such that x is at least
* <b>number</b>, and x modulo <b>divisor</b> == 0. */
int64_t
round_int64_to_next_multiple_of(int64_t number, int64_t divisor)
{
tor_assert(divisor > 0);
if (number >= 0 && INT64_MAX - divisor + 1 >= number)
number += divisor - 1;
number -= number % divisor;
return number;
}
/** Transform a random value <b>p</b> from the uniform distribution in
* [0.0, 1.0[ into a Laplace distributed value with location parameter
* <b>mu</b> and scale parameter <b>b</b> in [-Inf, Inf[. */
double
sample_laplace_distribution(double mu, double b, double p)
{
tor_assert(p >= 0.0 && p < 1.0);
/* This is the "inverse cumulative distribution function" from:
* http://en.wikipedia.org/wiki/Laplace_distribution */
return mu - b * (p > 0.5 ? 1.0 : -1.0)
* tor_mathlog(1.0 - 2.0 * fabs(p - 0.5));
}
/** Add random noise between INT64_MIN and INT64_MAX coming from a
* Laplace distribution with mu = 0 and b = <b>delta_f</b>/<b>epsilon</b>
* to <b>signal</b> based on the provided <b>random</b> value in
* [0.0, 1.0[. */
int64_t
add_laplace_noise(int64_t signal, double random, double delta_f,
double epsilon)
{
/* cast to int64_t intended */
int64_t noise = sample_laplace_distribution(
0.0, /* just add noise, no further signal */
delta_f / epsilon, random);
if (noise > 0 && INT64_MAX - noise < signal)
return INT64_MAX;
else if (noise < 0 && INT64_MIN - noise > signal)
return INT64_MIN;
else
return signal + noise;
}
/** Return the number of bits set in <b>v</b>. */ /** Return the number of bits set in <b>v</b>. */
int int
n_bits_set_u8(uint8_t v) n_bits_set_u8(uint8_t v)

View File

@ -172,6 +172,10 @@ uint64_t round_to_power_of_2(uint64_t u64);
unsigned round_to_next_multiple_of(unsigned number, unsigned divisor); unsigned round_to_next_multiple_of(unsigned number, unsigned divisor);
uint32_t round_uint32_to_next_multiple_of(uint32_t number, uint32_t divisor); uint32_t round_uint32_to_next_multiple_of(uint32_t number, uint32_t divisor);
uint64_t round_uint64_to_next_multiple_of(uint64_t number, uint64_t divisor); uint64_t round_uint64_to_next_multiple_of(uint64_t number, uint64_t divisor);
int64_t round_int64_to_next_multiple_of(int64_t number, int64_t divisor);
double sample_laplace_distribution(double mu, double b, double p);
int64_t add_laplace_noise(int64_t signal, double random, double delta_f,
double epsilon);
int n_bits_set_u8(uint8_t v); int n_bits_set_u8(uint8_t v);
/* Compute the CEIL of <b>a</b> divided by <b>b</b>, for nonnegative <b>a</b> /* Compute the CEIL of <b>a</b> divided by <b>b</b>, for nonnegative <b>a</b>

View File

@ -4619,6 +4619,58 @@ test_util_round_to_next_multiple_of(void *arg)
tt_assert(round_uint64_to_next_multiple_of(99,7) == 105); tt_assert(round_uint64_to_next_multiple_of(99,7) == 105);
tt_assert(round_uint64_to_next_multiple_of(99,9) == 99); tt_assert(round_uint64_to_next_multiple_of(99,9) == 99);
tt_assert(round_int64_to_next_multiple_of(0,1) == 0);
tt_assert(round_int64_to_next_multiple_of(0,7) == 0);
tt_assert(round_int64_to_next_multiple_of(99,1) == 99);
tt_assert(round_int64_to_next_multiple_of(99,7) == 105);
tt_assert(round_int64_to_next_multiple_of(99,9) == 99);
tt_assert(round_int64_to_next_multiple_of(-99,1) == -99);
tt_assert(round_int64_to_next_multiple_of(-99,7) == -98);
tt_assert(round_int64_to_next_multiple_of(-99,9) == -99);
tt_assert(round_int64_to_next_multiple_of(INT64_MIN,2) == INT64_MIN);
tt_assert(round_int64_to_next_multiple_of(INT64_MAX,2) ==
INT64_MAX-INT64_MAX%2);
done:
;
}
static void
test_util_laplace(void *arg)
{
/* Sample values produced using Python's SciPy:
*
* >>> from scipy.stats import laplace
* >>> laplace.ppf([-0.01, 0.0, 0.01, 0.5, 0.51, 0.99, 1.0, 1.01],
... loc = 24, scale = 24)
* array([ nan, -inf, -69.88855213, 24. ,
* 24.48486498, 117.88855213, inf, nan])
*/
const double mu = 24.0, b = 24.0;
const double delta_f = 15.0, epsilon = 0.3; /* b = 15.0 / 0.3 = 50.0 */
(void)arg;
tt_assert(isinf(sample_laplace_distribution(mu, b, 0.0)));
test_feq(-69.88855213, sample_laplace_distribution(mu, b, 0.01));
test_feq(24.0, sample_laplace_distribution(mu, b, 0.5));
test_feq(24.48486498, sample_laplace_distribution(mu, b, 0.51));
test_feq(117.88855213, sample_laplace_distribution(mu, b, 0.99));
/* >>> laplace.ppf([0.0, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99],
* ... loc = 0, scale = 50)
* array([ -inf, -80.47189562, -34.65735903, 0. ,
* 34.65735903, 80.47189562, 195.60115027])
*/
tt_assert(LONG_MIN + 20 ==
add_laplace_noise(20, 0.0, delta_f, epsilon));
tt_assert(-60 == add_laplace_noise(20, 0.1, delta_f, epsilon));
tt_assert(-14 == add_laplace_noise(20, 0.25, delta_f, epsilon));
tt_assert(20 == add_laplace_noise(20, 0.5, delta_f, epsilon));
tt_assert(54 == add_laplace_noise(20, 0.75, delta_f, epsilon));
tt_assert(100 == add_laplace_noise(20, 0.9, delta_f, epsilon));
tt_assert(215 == add_laplace_noise(20, 0.99, delta_f, epsilon));
done: done:
; ;
} }
@ -4880,6 +4932,7 @@ struct testcase_t util_tests[] = {
UTIL_LEGACY(strtok), UTIL_LEGACY(strtok),
UTIL_LEGACY(di_ops), UTIL_LEGACY(di_ops),
UTIL_TEST(round_to_next_multiple_of, 0), UTIL_TEST(round_to_next_multiple_of, 0),
UTIL_TEST(laplace, 0),
UTIL_TEST(strclear, 0), UTIL_TEST(strclear, 0),
UTIL_TEST(find_str_at_start_of_line, 0), UTIL_TEST(find_str_at_start_of_line, 0),
UTIL_TEST(string_is_C_identifier, 0), UTIL_TEST(string_is_C_identifier, 0),