Write unit tests and fix issues they uncovered.

This commit is contained in:
Mike Perry 2009-08-27 23:28:20 -07:00
parent 04414830fe
commit b52bce91fc
4 changed files with 206 additions and 66 deletions

View File

@ -9,9 +9,13 @@
* \brief The actual details of building circuits. * \brief The actual details of building circuits.
**/ **/
// XXX: Move this noise to common/compat.c?
#include <math.h> #include <math.h>
// also use pow()
long int lround(double x); long int lround(double x);
double ln(double x);
double double
ln(double x) ln(double x)
@ -20,6 +24,8 @@ ln(double x)
} }
#undef log #undef log
#define CIRCUIT_PRIVATE
#include "or.h" #include "or.h"
#include "crypto.h" #include "crypto.h"
@ -81,16 +87,19 @@ static time_t start_of_month(time_t when);
* time units are milliseconds * time units are milliseconds
*/ */
int int
circuit_build_times_add_time(circuit_build_times_t *cbt, long time) circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time)
{ {
if (time > UINT16_MAX) { if (time > BUILD_TIME_MAX) {
log_notice(LD_CIRC, log_notice(LD_CIRC,
"Circuit build time of %ldms exceeds max. Capping at 65536ms", time); "Circuit build time of %ums exceeds max. Capping at 65536ms", time);
time = UINT16_MAX; time = BUILD_TIME_MAX;
} else if (time <= 0) {
log_err(LD_CIRC, "Circuit build time is %u!", time);
return -1;
} }
cbt->circuit_build_times[cbt->build_times_idx] = time; cbt->circuit_build_times[cbt->build_times_idx] = time;
cbt->build_times_idx = (cbt->build_times_idx + 1) % NCIRCUITS_TO_OBSERVE; cbt->build_times_idx = (cbt->build_times_idx + 1) % NCIRCUITS_TO_OBSERVE;
if (cbt->total_build_times + 1 < NCIRCUITS_TO_OBSERVE) if (cbt->total_build_times < NCIRCUITS_TO_OBSERVE)
cbt->total_build_times++; cbt->total_build_times++;
return 0; return 0;
@ -101,7 +110,7 @@ circuit_build_times_add_time(circuit_build_times_t *cbt, long time)
*/ */
static void static void
circuit_build_times_create_histogram(circuit_build_times_t *cbt, circuit_build_times_create_histogram(circuit_build_times_t *cbt,
uint16_t *histogram) build_time_t *histogram)
{ {
int i, c; int i, c;
// calculate histogram // calculate histogram
@ -116,10 +125,11 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt,
/** /**
* Find maximum circuit build time * Find maximum circuit build time
*/ */
static uint16_t static build_time_t
circuit_build_times_max(circuit_build_times_t *cbt) circuit_build_times_max(circuit_build_times_t *cbt)
{ {
int i = 0, max_build_time = 0; int i = 0;
build_time_t max_build_time = 0;
for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) {
if (cbt->circuit_build_times[i] > max_build_time) if (cbt->circuit_build_times[i] > max_build_time)
max_build_time = cbt->circuit_build_times[i]; max_build_time = cbt->circuit_build_times[i];
@ -127,36 +137,39 @@ circuit_build_times_max(circuit_build_times_t *cbt)
return max_build_time; return max_build_time;
} }
static uint16_t static build_time_t
circuit_build_times_min(circuit_build_times_t *cbt) circuit_build_times_min(circuit_build_times_t *cbt)
{ {
int i = 0; int i = 0;
uint16_t min_build_time = UINT16_MAX; build_time_t min_build_time = BUILD_TIME_MAX;
for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) {
if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */ if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */
cbt->circuit_build_times[i] < min_build_time) cbt->circuit_build_times[i] < min_build_time)
min_build_time = cbt->circuit_build_times[i]; min_build_time = cbt->circuit_build_times[i];
} }
if (min_build_time == UINT16_MAX) { if (min_build_time == BUILD_TIME_MAX) {
log_warn(LD_CIRC, "No build times less than UIN16_MAX!"); log_warn(LD_CIRC, "No build times less than BUILD_TIME_MAX!");
} }
return min_build_time; return min_build_time;
} }
/** /**
* output a histogram of current circuit build times * output a histogram of current circuit build times.
*
* XXX: Is do_unit the right way to do this unit test
* noise?
*/ */
void void
circuit_build_times_update_state(circuit_build_times_t *cbt, circuit_build_times_update_state(circuit_build_times_t *cbt,
or_state_t *state) or_state_t *state, int do_unit)
{ {
uint16_t max_build_time = 0, *histogram; build_time_t max_build_time = 0, *histogram;
int i = 0, nbins = 0; int i = 0, nbins = 0;
config_line_t **next, *line; config_line_t **next, *line;
max_build_time = circuit_build_times_max(cbt); max_build_time = circuit_build_times_max(cbt);
nbins = 1 + (max_build_time / BUILDTIME_BIN_WIDTH); nbins = 1 + (max_build_time / BUILDTIME_BIN_WIDTH);
histogram = tor_malloc_zero(nbins * sizeof(uint16_t)); histogram = tor_malloc_zero(nbins * sizeof(build_time_t));
circuit_build_times_create_histogram(cbt, histogram); circuit_build_times_create_histogram(cbt, histogram);
// write to state // write to state
@ -177,8 +190,11 @@ circuit_build_times_update_state(circuit_build_times_t *cbt,
histogram[i]); histogram[i]);
next = &(line->next); next = &(line->next);
} }
if (!get_options()->AvoidDiskWrites)
or_state_mark_dirty(get_or_state(), 0); if (!do_unit) {
if (!get_options()->AvoidDiskWrites)
or_state_mark_dirty(get_or_state(), 0);
}
if (histogram) tor_free(histogram); if (histogram) tor_free(histogram);
} }
@ -191,8 +207,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
int tot_values = 0, lines = 0; int tot_values = 0, lines = 0;
config_line_t *line; config_line_t *line;
msg = NULL; msg = NULL;
memset(cbt->circuit_build_times, 0, NCIRCUITS_TO_OBSERVE); memset(cbt, 0, sizeof(*cbt));
cbt->total_build_times = state->TotalBuildTimes;
for (line = state->BuildtimeHistogram; line; line = line->next) { for (line = state->BuildtimeHistogram; line; line = line->next) {
smartlist_t * args = smartlist_create(); smartlist_t * args = smartlist_create();
@ -206,9 +221,9 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
const char *ms_str = smartlist_get(args,0); const char *ms_str = smartlist_get(args,0);
const char *count_str = smartlist_get(args,1); const char *count_str = smartlist_get(args,1);
uint32_t count, i; uint32_t count, i;
uint16_t ms; build_time_t ms;
int ok; int ok;
ms = tor_parse_ulong(ms_str, 0, 0, UINT16_MAX, &ok, NULL); ms = tor_parse_ulong(ms_str, 0, 0, BUILD_TIME_MAX, &ok, NULL);
if (!ok) { if (!ok) {
*msg = tor_strdup("Unable to parse circuit build times: " *msg = tor_strdup("Unable to parse circuit build times: "
"Unparsable bin number"); "Unparsable bin number");
@ -233,10 +248,10 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
return (msg ? -1 : 0); return (msg ? -1 : 0);
} }
static void void
circuit_build_times_update_alpha(circuit_build_times_t *cbt) circuit_build_times_update_alpha(circuit_build_times_t *cbt)
{ {
uint16_t *x=cbt->circuit_build_times; build_time_t *x=cbt->circuit_build_times;
double a = 0; double a = 0;
int n=0,i=0; int n=0,i=0;
@ -248,6 +263,10 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
a += ln(x[i]); a += ln(x[i]);
n++; n++;
} }
if (n!=cbt->total_build_times) {
log_err(LD_CIRC, "Discrepency in build times count: %d vs %d", n,
cbt->total_build_times);
}
tor_assert(n==cbt->total_build_times); tor_assert(n==cbt->total_build_times);
a -= n*ln(cbt->Xm); a -= n*ln(cbt->Xm);
a = n/a; a = n/a;
@ -257,7 +276,7 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
/** /**
* This is the Pareto Quantile Function. It calculates the point x * This is the Pareto Quantile Function. It calculates the point x
* in the distribution such that F(x) < quantile (ie quantile*100% * in the distribution such that F(x) = quantile (ie quantile*100%
* of the mass of the density function is below x on the curve). * of the mass of the density function is below x on the curve).
* *
* We use it to calculate the timeout and also synthetic values of * We use it to calculate the timeout and also synthetic values of
@ -265,50 +284,92 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
* *
* See http://en.wikipedia.org/wiki/Quantile_function, * See http://en.wikipedia.org/wiki/Quantile_function,
* http://en.wikipedia.org/wiki/Inverse_transform_sampling and * http://en.wikipedia.org/wiki/Inverse_transform_sampling and
* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation * http://en.wikipedia.org/wiki/Pareto_distribution#Generating_a_random_sample_from_Pareto_distribution
* That's right. I'll cite wikipedia all day long. * That's right. I'll cite wikipedia all day long.
*/ */
static double double
circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, circuit_build_times_calculate_timeout(circuit_build_times_t *cbt,
double quantile) double quantile)
{ {
return cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha); double ret = cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha);
if (ret > INT32_MAX) {
ret = INT32_MAX;
}
tor_assert(ret > 0);
return ret;
}
/* Pareto CDF */
double
circuit_build_times_cdf(circuit_build_times_t *cbt,
double x)
{
double ret = 1.0-pow(cbt->Xm/x,cbt->alpha);
tor_assert(0 <= ret && ret <= 1.0);
return ret;
}
build_time_t
circuit_build_times_generate_sample(circuit_build_times_t *cbt,
double q_lo, double q_hi)
{
uint32_t r = crypto_rand_int(UINT32_MAX-1);
double u = q_lo + ((q_hi-q_lo)*r)/(1.0*UINT32_MAX);
build_time_t ret;
tor_assert(0 <= u && u < 1.0);
ret = lround(circuit_build_times_calculate_timeout(cbt, u));
tor_assert(ret > 0);
return ret;
} }
static void static void
circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt) circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt)
{ {
/* Generate 0.8-1.0... */ /* Generate 0.8-1.0... */
uint64_t r = crypto_rand_uint64(UINT64_MAX-1); build_time_t gentime = circuit_build_times_generate_sample(cbt,
double u = BUILDTIMEOUT_QUANTILE_CUTOFF + BUILDTIMEOUT_QUANTILE_CUTOFF, 1.0);
((1.0-BUILDTIMEOUT_QUANTILE_CUTOFF)*r)/(1.0*UINT64_MAX);
long gentime = lround(circuit_build_times_calculate_timeout(cbt, u)); if (gentime < (build_time_t)get_options()->CircuitBuildTimeout*1000) {
if (gentime < get_options()->CircuitBuildTimeout*1000) {
log_warn(LD_CIRC, log_warn(LD_CIRC,
"Generated a synthetic timeout LESS than the current timeout: %ld vs %d", "Generated a synthetic timeout LESS than the current timeout: %u vs %d",
gentime, get_options()->CircuitBuildTimeout*1000); gentime, get_options()->CircuitBuildTimeout*1000);
tor_assert(gentime >= get_options()->CircuitBuildTimeout*1000); tor_assert(gentime >=
} else if (gentime > UINT16_MAX) { (build_time_t)get_options()->CircuitBuildTimeout*1000);
gentime = UINT16_MAX; } else if (gentime > BUILD_TIME_MAX) {
gentime = BUILD_TIME_MAX;
log_info(LD_CIRC, log_info(LD_CIRC,
"Generated a synthetic timeout LESS than the current timeout: %ld vs %d", "Generated a synthetic timeout larger than the max: %u", gentime);
gentime, get_options()->CircuitBuildTimeout*1000);
} else { } else {
log_info(LD_CIRC, "Generated synthetic time %ld for timeout", log_info(LD_CIRC, "Generated synthetic time %u for timeout", gentime);
gentime);
} }
circuit_build_times_add_time(cbt, gentime); circuit_build_times_add_time(cbt, gentime);
} }
void
circuit_build_times_initial_alpha(circuit_build_times_t *cbt,
double quantile, build_time_t timeout)
{
// Q(u) = Xm/((1-u)^(1/a))
// Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout
// CircBuildTimeout = Xm/((1-0.8))^(1/a))
// CircBuildTimeout = Xm*((1-0.8))^(-1/a))
// ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a)
// -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a
cbt->alpha = ln(1.0-quantile)/(ln(cbt->Xm)-ln(timeout));
}
/** /**
* Store a timeout as a synthetic value * Store a timeout as a synthetic value
*/ */
void void
circuit_build_times_add_timeout(circuit_build_times_t *cbt) circuit_build_times_add_timeout(circuit_build_times_t *cbt)
{ {
/* XXX: If there are a ton of timeouts, we should reduce
* the circuit build timeout by like 2X or something...
* But then how do we differentiate between that and network
* failure? */
if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) { if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) {
/* Store a timeout before we have enough data as special */ /* Store a timeout before we have enough data as special */
cbt->pre_timeouts++; cbt->pre_timeouts++;
@ -316,17 +377,12 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt)
} }
/* Store a timeout as a random position on this curve */ /* Store a timeout as a random position on this curve */
if (cbt->pre_timeouts && get_options()->CircuitBuildTimeout != 60) { if (cbt->pre_timeouts) {
cbt->Xm = circuit_build_times_min(cbt); cbt->Xm = circuit_build_times_min(cbt);
// Use current timeout to get an estimate on alpha // Use current timeout to get an estimate on alpha
// Q(u) = Xm/((1-u)^(1/a)) circuit_build_times_initial_alpha(cbt,
// Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout 1.0-((double)cbt->pre_timeouts)/cbt->total_build_times,
// CircBuildTimeout = Xm/((1-0.8))^(1/a)) get_options()->CircuitBuildTimeout*1000);
// CircBuildTimeout = Xm*((1-0.8))^(-1/a))
// ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a)
// -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a
cbt->alpha = -ln(1-BUILDTIMEOUT_QUANTILE_CUTOFF)/
(ln(get_options()->CircuitBuildTimeout)-ln(cbt->Xm));
while (cbt->pre_timeouts-- != 0) { while (cbt->pre_timeouts-- != 0) {
circuit_build_times_add_timeout_worker(cbt); circuit_build_times_add_timeout_worker(cbt);
} }

View File

@ -5207,7 +5207,7 @@ or_state_save(time_t now)
* to avoid redundant writes. */ * to avoid redundant writes. */
entry_guards_update_state(global_state); entry_guards_update_state(global_state);
rep_hist_update_state(global_state); rep_hist_update_state(global_state);
circuit_build_times_update_state(&circ_times, global_state); circuit_build_times_update_state(&circ_times, global_state, 0);
if (accounting_is_enabled(get_options())) if (accounting_is_enabled(get_options()))
accounting_run_housekeeping(now); accounting_run_housekeeping(now);

View File

@ -1884,15 +1884,6 @@ typedef struct crypt_path_t {
DH_KEY_LEN) DH_KEY_LEN)
#define ONIONSKIN_REPLY_LEN (DH_KEY_LEN+DIGEST_LEN) #define ONIONSKIN_REPLY_LEN (DH_KEY_LEN+DIGEST_LEN)
// XXX: Do we want to artifically tweak CircuitIdleTimeout and
// the number of circuits we build at a time if < MIN here?
#define MIN_CIRCUITS_TO_OBSERVE 1000
#define NCIRCUITS_TO_OBSERVE 10000 /* approx 3 weeks worth of circuits */
#define BUILDTIME_BIN_WIDTH 50
/* TODO: This should be moved to the consensus */
#define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8
/** Information used to build a circuit. */ /** Information used to build a circuit. */
typedef struct { typedef struct {
/** Intended length of the final circuit. */ /** Intended length of the final circuit. */
@ -2866,25 +2857,51 @@ void bridges_retry_all(void);
void entry_guards_free_all(void); void entry_guards_free_all(void);
/* Circuit Build Timeout "public" functions (I love C... No wait.) */ /* Circuit Build Timeout "public" functions and structures.
* (I love C... No wait.) */
// XXX: Do we want to artifically tweak CircuitIdleTimeout and
// the number of circuits we build at a time if < MIN here?
#define MIN_CIRCUITS_TO_OBSERVE 500
#define NCIRCUITS_TO_OBSERVE 5000 /* approx 1.5 weeks worth of circuits */
#define BUILDTIME_BIN_WIDTH 50
/* TODO: This should be moved to the consensus */
#define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8
typedef uint32_t build_time_t;
#define BUILD_TIME_MAX ((build_time_t)(INT32_MAX))
typedef struct { typedef struct {
// XXX: Make this a smartlist.. // XXX: Make this a smartlist..
uint16_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; build_time_t circuit_build_times[NCIRCUITS_TO_OBSERVE];
int build_times_idx; int build_times_idx;
int total_build_times; int total_build_times;
int pre_timeouts; int pre_timeouts;
uint16_t Xm; build_time_t Xm;
double alpha; double alpha;
} circuit_build_times_t; } circuit_build_times_t;
extern circuit_build_times_t circ_times; extern circuit_build_times_t circ_times;
void circuit_build_times_update_state(circuit_build_times_t *cbt, void circuit_build_times_update_state(circuit_build_times_t *cbt,
or_state_t *state); or_state_t *state, int do_unit);
int circuit_build_times_parse_state(circuit_build_times_t *cbt, int circuit_build_times_parse_state(circuit_build_times_t *cbt,
or_state_t *state, char **msg); or_state_t *state, char **msg);
void circuit_build_times_add_timeout(circuit_build_times_t *cbt); void circuit_build_times_add_timeout(circuit_build_times_t *cbt);
void circuit_build_times_set_timeout(circuit_build_times_t *cbt); void circuit_build_times_set_timeout(circuit_build_times_t *cbt);
int circuit_build_times_add_time(circuit_build_times_t *cbt, long time); int circuit_build_times_add_time(circuit_build_times_t *cbt,
build_time_t time);
#ifdef CIRCUIT_PRIVATE
double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt,
double quantile);
build_time_t circuit_build_times_generate_sample(circuit_build_times_t *cbt,
double q_lo, double q_hi);
void circuit_build_times_initial_alpha(circuit_build_times_t *cbt,
double quantile, build_time_t time);
void circuit_build_times_update_alpha(circuit_build_times_t *cbt);
double circuit_build_times_cdf(circuit_build_times_t *cbt, double x);
#endif
/********************************* circuitlist.c ***********************/ /********************************* circuitlist.c ***********************/

View File

@ -37,6 +37,9 @@ const char tor_git_revision[] = "";
#define GEOIP_PRIVATE #define GEOIP_PRIVATE
#define MEMPOOL_PRIVATE #define MEMPOOL_PRIVATE
#define ROUTER_PRIVATE #define ROUTER_PRIVATE
#define CIRCUIT_PRIVATE
#include <math.h>
#include "or.h" #include "or.h"
#include "test.h" #include "test.h"
@ -3404,6 +3407,69 @@ test_dirutil_param_voting(void)
smartlist_free(vote3.net_params); smartlist_free(vote3.net_params);
smartlist_free(vote4.net_params); smartlist_free(vote4.net_params);
return;
}
static void
test_circuit_timeout(void)
{
/* Plan:
* 1. Generate 1000 samples
* 2. Estimate parameters
* 3. If difference, repeat
* 4. Save state
* 5. load state
* 6. Estimate parameters
* 7. compare differences
*/
circuit_build_times_t initial;
circuit_build_times_t estimate;
circuit_build_times_t final;
or_state_t state;
int i;
char *msg;
double timeout1, timeout2;
memset(&initial, 0, sizeof(circuit_build_times_t));
memset(&estimate, 0, sizeof(circuit_build_times_t));
memset(&final, 0, sizeof(circuit_build_times_t));
memset(&state, 0, sizeof(or_state_t));
#define timeout0 (30*1000.0)
initial.Xm = 750;
circuit_build_times_initial_alpha(&initial, BUILDTIMEOUT_QUANTILE_CUTOFF,
timeout0);
do {
int n = 0;
for (i=0; i < MIN_CIRCUITS_TO_OBSERVE; i++) {
if (circuit_build_times_add_time(&estimate,
circuit_build_times_generate_sample(&initial, 0, 1)) == 0) {
n++;
}
}
circuit_build_times_update_alpha(&estimate);
timeout1 = circuit_build_times_calculate_timeout(&estimate,
BUILDTIMEOUT_QUANTILE_CUTOFF);
log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout1, estimate.Xm);
} while (fabs(circuit_build_times_cdf(&initial, timeout0) -
circuit_build_times_cdf(&initial, timeout1)) > 0.05
/* 5% error */
&& estimate.total_build_times < NCIRCUITS_TO_OBSERVE);
test_assert(estimate.total_build_times < NCIRCUITS_TO_OBSERVE);
circuit_build_times_update_state(&estimate, &state, 1);
test_assert(circuit_build_times_parse_state(&final, &state, &msg) == 0);
circuit_build_times_update_alpha(&final);
timeout2 = circuit_build_times_calculate_timeout(&final,
BUILDTIMEOUT_QUANTILE_CUTOFF);
log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout2, final.Xm);
test_assert(fabs(circuit_build_times_cdf(&initial, timeout0) -
circuit_build_times_cdf(&initial, timeout2)) < 0.05);
done:
return;
} }
extern const char AUTHORITY_CERT_1[]; extern const char AUTHORITY_CERT_1[];
@ -4931,6 +4997,7 @@ static struct {
ENT(dirutil), ENT(dirutil),
SUBENT(dirutil, measured_bw), SUBENT(dirutil, measured_bw),
SUBENT(dirutil, param_voting), SUBENT(dirutil, param_voting),
ENT(circuit_timeout),
ENT(v3_networkstatus), ENT(v3_networkstatus),
ENT(policies), ENT(policies),
ENT(rend_fns), ENT(rend_fns),