Directories now also measure download times of network statuses.

This commit is contained in:
Karsten Loesing 2009-07-12 16:33:31 +02:00
parent 72c5a46b43
commit 3c05132575
7 changed files with 326 additions and 1 deletions

View File

@ -20,6 +20,7 @@ Changes in version 0.2.2.1-alpha - 2009-??-??
Estimated shares of v2 and v3 requests are determined as averages, Estimated shares of v2 and v3 requests are determined as averages,
not at the end of a measurement period. Also, unresolved requests not at the end of a measurement period. Also, unresolved requests
are listed with country code '??'. are listed with country code '??'.
Directories now also measure download times of network statuses.
- Exit nodes can write statistics on the number of exit streams and - Exit nodes can write statistics on the number of exit streams and
transferred bytes per port to disk every 24 hours. To enable this, transferred bytes per port to disk every 24 hours. To enable this,
run configure with the --enable-exit-stats option, and set run configure with the --enable-exit-stats option, and set

View File

@ -2302,6 +2302,13 @@ connection_handle_write(connection_t *conn, int force)
/* else open, or closing */ /* else open, or closing */
result = flush_buf_tls(or_conn->tls, conn->outbuf, result = flush_buf_tls(or_conn->tls, conn->outbuf,
max_to_write, &conn->outbuf_flushlen); max_to_write, &conn->outbuf_flushlen);
#ifdef ENABLE_GEOIP_STATS
/* If we just flushed the last bytes, check if this tunneled dir
* request is done. */
if (buf_datalen(conn->outbuf) == 0 && conn->request_id)
geoip_change_dirreq_state(conn->request_id, REQUEST_TUNNELED,
OR_CONN_BUFFER_FLUSHED);
#endif
switch (result) { switch (result) {
CASE_TOR_TLS_ERROR_ANY: CASE_TOR_TLS_ERROR_ANY:
case TOR_TLS_CLOSE: case TOR_TLS_CLOSE:

View File

@ -2551,6 +2551,11 @@ connection_exit_begin_conn(cell_t *cell, circuit_t *circ)
log_debug(LD_EXIT,"Creating new exit connection."); log_debug(LD_EXIT,"Creating new exit connection.");
n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET); n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET);
#ifdef ENABLE_GEOIP_STATS
/* Remember the tunneled request ID in the new edge connection, so that
* we can measure download times. */
TO_CONN(n_stream)->request_id = circ->request_id;
#endif
n_stream->_base.purpose = EXIT_PURPOSE_CONNECT; n_stream->_base.purpose = EXIT_PURPOSE_CONNECT;
n_stream->stream_id = rh.stream_id; n_stream->stream_id = rh.stream_id;
@ -2787,6 +2792,11 @@ connection_exit_connect_dir(edge_connection_t *exitconn)
dirconn->_base.purpose = DIR_PURPOSE_SERVER; dirconn->_base.purpose = DIR_PURPOSE_SERVER;
dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT; dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT;
#ifdef ENABLE_GEOIP_STATS
/* Note that the new dir conn belongs to the same tunneled request as
* the edge conn, so that we can measure download times. */
TO_CONN(dirconn)->request_id = TO_CONN(exitconn)->request_id;
#endif
connection_link_connections(TO_CONN(dirconn), TO_CONN(exitconn)); connection_link_connections(TO_CONN(dirconn), TO_CONN(exitconn));
if (connection_add(TO_CONN(exitconn))<0) { if (connection_add(TO_CONN(exitconn))<0) {

View File

@ -2565,9 +2565,18 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
#ifdef ENABLE_GEOIP_STATS #ifdef ENABLE_GEOIP_STATS
{ {
struct in_addr in; struct in_addr in;
if (tor_inet_aton((TO_CONN(conn))->address, &in)) if (tor_inet_aton((TO_CONN(conn))->address, &in)) {
geoip_note_client_seen(act, ntohl(in.s_addr), time(NULL)); geoip_note_client_seen(act, ntohl(in.s_addr), time(NULL));
geoip_note_ns_response(act, GEOIP_SUCCESS); geoip_note_ns_response(act, GEOIP_SUCCESS);
/* Note that a request for a network status has started, so that we
* can measure the download time later on. */
if (TO_CONN(conn)->request_id)
geoip_start_dirreq(TO_CONN(conn)->request_id, dlen, act,
REQUEST_TUNNELED);
else
geoip_start_dirreq(TO_CONN(conn)->global_identifier, dlen, act,
REQUEST_DIRECT);
}
} }
#endif #endif
@ -3201,6 +3210,17 @@ connection_dir_finished_flushing(dir_connection_t *conn)
tor_assert(conn); tor_assert(conn);
tor_assert(conn->_base.type == CONN_TYPE_DIR); tor_assert(conn->_base.type == CONN_TYPE_DIR);
#ifdef ENABLE_GEOIP_STATS
/* Note that we have finished writing the directory response. For direct
* connections this means we're done, for tunneled connections its only
* an intermediate step. */
if (TO_CONN(conn)->request_id)
geoip_change_dirreq_state(TO_CONN(conn)->request_id, REQUEST_TUNNELED,
FLUSHING_DIR_CONN_FINISHED);
else
geoip_change_dirreq_state(TO_CONN(conn)->global_identifier,
REQUEST_DIRECT, FLUSHING_DIR_CONN_FINISHED);
#endif
switch (conn->_base.state) { switch (conn->_base.state) {
case DIR_CONN_STATE_CLIENT_SENDING: case DIR_CONN_STATE_CLIENT_SENDING:
log_debug(LD_DIR,"client finished sending command."); log_debug(LD_DIR,"client finished sending command.");

View File

@ -570,6 +570,204 @@ _c_hist_compare(const void **_a, const void **_b)
return strcmp(a->country, b->country); return strcmp(a->country, b->country);
} }
/** When there are incomplete directory requests at the end of a 24-hour
* period, consider those requests running for longer than this timeout as
* failed, the others as still running. */
#define DIRREQ_TIMEOUT (10*60)
/** Entry in a map from either conn->global_identifier for direct requests
* or a unique circuit identifier for tunneled requests to request time,
* response size, and completion time of a network status request. Used to
* measure download times of requests to derive average client
* bandwidths. */
typedef struct dirreqdlmap_entry_t {
directory_request_state_t state; /**< State of this directory request. */
/** Unique identifier for this network status request; this is either the
* conn->global_identifier of the dir conn (direct request) or a new
* locally unique identifier of a circuit (tunneled request). This ID is
* only unique among other direct or tunneled requests, respectively. */
uint64_t request_id;
/** Is this a direct or a tunneled request? */
directory_request_type_t type;
int completed:1; /**< Is this request complete? */
geoip_client_action_t action; /**< Is this a v2 or v3 request? */
/** When did we receive the request and started sending the response? */
struct timeval request_time;
size_t response_size; /**< What is the size of the response in bytes? */
struct timeval completion_time; /**< When did the request succeed? */
} dirreqdlmap_entry_t;
/** Map of all directory requests asking for v2 or v3 network statuses in
* the current geoip-stats interval. Keys are strings starting with either
* "dir" for direct requests or "tun" for tunneled requests, followed by
* a unique uint64_t identifier represented as decimal string. Values are
* of type *<b>dirreqdlmap_entry_t</b>. */
static strmap_t *dirreqdlmap = NULL;
/** Helper: Put <b>entry</b> into map of directory requests using
* <b>tunneled</b> and <b>request_id</b> as key parts. If there is
* already an entry for that key, print out a BUG warning and return. */
static void
_dirreqdlmap_put(dirreqdlmap_entry_t *entry,
directory_request_type_t type, uint64_t request_id)
{
char key[3+20+1]; /* dir|tun + -9223372036854775808 + \0 */
dirreqdlmap_entry_t *ent;
if (!dirreqdlmap)
dirreqdlmap = strmap_new();
tor_snprintf(key, sizeof(key), "%s"U64_FORMAT,
type == REQUEST_TUNNELED ? "tun" : "dir",
U64_PRINTF_ARG(request_id));
ent = strmap_get(dirreqdlmap, key);
if (ent) {
log_warn(LD_BUG, "Error when putting directory request into local "
"map. There is already an entry for the same identifier.");
return;
}
strmap_set(dirreqdlmap, key, entry);
}
/** Helper: Look up and return an entry in the map of directory requests
* using <b>tunneled</b> and <b>request_id</b> as key parts. If there
* is no such entry, return NULL. */
static dirreqdlmap_entry_t *
_dirreqdlmap_get(directory_request_type_t type, uint64_t request_id)
{
char key[3+20+1]; /* dir|tun + -9223372036854775808 + \0 */
if (!dirreqdlmap)
dirreqdlmap = strmap_new();
tor_snprintf(key, sizeof(key), "%s"U64_FORMAT,
type == REQUEST_TUNNELED ? "tun" : "dir",
U64_PRINTF_ARG(request_id));
return strmap_get(dirreqdlmap, key);
}
/** Note that an either direct or tunneled (see <b>type</b>) directory
* request for a network status with unique ID <b>request_id</b> of size
* <b>response_size</b> and action <b>action</b> (either v2 or v3) has
* started. */
void
geoip_start_dirreq(uint64_t request_id, size_t response_size,
geoip_client_action_t action,
directory_request_type_t type)
{
dirreqdlmap_entry_t *ent = tor_malloc_zero(sizeof(dirreqdlmap_entry_t));
ent->request_id = request_id;
tor_gettimeofday(&ent->request_time);
ent->response_size = response_size;
ent->action = action;
ent->type = type;
_dirreqdlmap_put(ent, type, request_id);
}
/** Change the state of the either direct or tunneled (see <b>type</b>)
* directory request with <b>request_id</b> to <b>new_state</b> and
* possibly mark it as completed. If no entry can be found for the given
* key parts (e.g., if this is a directory request that we are not
* measuring, or one that was started in the previous measurement period),
* or if the state cannot be advanced to <b>new_state</b>, do nothing. */
void
geoip_change_dirreq_state(uint64_t request_id,
directory_request_type_t type,
directory_request_state_t new_state)
{
dirreqdlmap_entry_t *ent = _dirreqdlmap_get(type, request_id);
if (!ent)
return;
if (new_state == REQUEST_IS_FOR_NETWORK_STATUS)
return;
if (new_state - 1 != ent->state)
return;
ent->state = new_state;
if ((type == REQUEST_DIRECT && new_state == FLUSHING_DIR_CONN_FINISHED) ||
(type == REQUEST_TUNNELED && new_state == OR_CONN_BUFFER_FLUSHED)) {
tor_gettimeofday(&ent->completion_time);
ent->completed = 1;
}
}
#ifdef ENABLE_GEOIP_STATS
/** Return a newly allocated comma-separated string containing statistics
* on network status downloads. The string contains the number of completed
* requests, timeouts, and still running requests as well as the download
* times by deciles and quartiles. Return NULL if we have not observed
* requests for long enough. */
static char *
geoip_get_dirreqdl_history(geoip_client_action_t action,
directory_request_type_t type)
{
char *result = NULL;
smartlist_t *dirreqdl_times = NULL;
uint32_t complete = 0, timeouts = 0, running = 0;
int i = 0, bufsize = 1024, written;
struct timeval now;
tor_gettimeofday(&now);
if (!dirreqdlmap)
return NULL;
if (action != GEOIP_CLIENT_NETWORKSTATUS &&
action != GEOIP_CLIENT_NETWORKSTATUS_V2)
return NULL;
dirreqdl_times = smartlist_create();
STRMAP_FOREACH_MODIFY(dirreqdlmap, key, dirreqdlmap_entry_t *, ent) {
if (ent->action == action && type == ent->type) {
if (ent->completed) {
uint32_t *bytes_per_second = tor_malloc_zero(sizeof(uint32_t));
uint32_t time_diff = (uint32_t) tv_udiff(&ent->request_time,
&ent->completion_time);
if (time_diff == 0)
time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible
* anyway by law of nature or something.. */
*bytes_per_second = 1000000 * ent->response_size / time_diff;
smartlist_add(dirreqdl_times, bytes_per_second);
complete++;
} else {
if (tv_udiff(&ent->request_time, &now) / 1000000 > DIRREQ_TIMEOUT)
timeouts++;
else
running++;
}
tor_free(ent);
MAP_DEL_CURRENT(key);
}
} STRMAP_FOREACH_END;
result = tor_malloc_zero(bufsize);
written = tor_snprintf(result, bufsize, "complete=%d,timeout=%d,"
"running=%d", complete, timeouts, running);
if (written < 0)
return NULL;
#define MIN_DIR_REQ_RESPONSES 16
if (complete >= MIN_DIR_REQ_RESPONSES) {
uint32_t *dltimes = tor_malloc(sizeof(uint32_t) * complete);
SMARTLIST_FOREACH(dirreqdl_times, uint32_t *, dlt, {
dltimes[i++] = *dlt;
tor_free(dlt);
});
median_uint32(dltimes, complete); /* sort */
written = tor_snprintf(result + written, bufsize - written,
",min=%d,d1=%d,d2=%d,q1=%d,d3=%d,d4=%d,md=%d,"
"d6=%d,d7=%d,q3=%d,d8=%d,d9=%d,max=%d",
dltimes[0],
dltimes[1*complete/10-1],
dltimes[2*complete/10-1],
dltimes[1*complete/4-1],
dltimes[3*complete/10-1],
dltimes[4*complete/10-1],
dltimes[5*complete/10-1],
dltimes[6*complete/10-1],
dltimes[7*complete/10-1],
dltimes[3*complete/4-1],
dltimes[8*complete/10-1],
dltimes[9*complete/10-1],
dltimes[complete-1]);
tor_free(dltimes);
}
if (written < 0)
result = NULL;
smartlist_free(dirreqdl_times);
return result;
}
#endif
/** How long do we have to have observed per-country request history before we /** How long do we have to have observed per-country request history before we
* are willing to talk about it? */ * are willing to talk about it? */
#define GEOIP_MIN_OBSERVATION_TIME (12*60*60) #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
@ -785,6 +983,23 @@ dump_geoip_stats(void)
goto done; goto done;
} }
data_v2 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS_V2,
REQUEST_DIRECT);
data_v3 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS,
REQUEST_DIRECT);
if (fprintf(out, "ns-direct-dl %s\nns-v2-direct-dl %s\n",
data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
goto done;
tor_free(data_v2);
tor_free(data_v3);
data_v2 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS_V2,
REQUEST_TUNNELED);
data_v3 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS,
REQUEST_TUNNELED);
if (fprintf(out, "ns-tunneled-dl %s\nns-v2-tunneled-dl %s\n",
data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
goto done;
finish_writing_to_file(open_file); finish_writing_to_file(open_file);
open_file = NULL; open_file = NULL;
done: done:

View File

@ -970,6 +970,10 @@ typedef struct connection_t {
* to the evdns_server_port is uses to listen to and answer connections. */ * to the evdns_server_port is uses to listen to and answer connections. */
struct evdns_server_port *dns_server_port; struct evdns_server_port *dns_server_port;
#ifdef ENABLE_GEOIP_STATS
/** Unique ID for measuring tunneled network status requests. */
uint64_t request_id;
#endif
} connection_t; } connection_t;
/** Stores flags and information related to the portion of a v2 Tor OR /** Stores flags and information related to the portion of a v2 Tor OR
@ -1956,6 +1960,10 @@ typedef struct circuit_t {
* linked to an OR connection. */ * linked to an OR connection. */
struct circuit_t *prev_active_on_n_conn; struct circuit_t *prev_active_on_n_conn;
struct circuit_t *next; /**< Next circuit in linked list of all circuits. */ struct circuit_t *next; /**< Next circuit in linked list of all circuits. */
#ifdef ENABLE_GEOIP_STATS
/** Unique ID for measuring tunneled network status requests. */
uint64_t request_id;
#endif
} circuit_t; } circuit_t;
/** Largest number of relay_early cells that we can send on a given /** Largest number of relay_early cells that we can send on a given
@ -3672,6 +3680,42 @@ int getinfo_helper_geoip(control_connection_t *control_conn,
const char *question, char **answer); const char *question, char **answer);
void geoip_free_all(void); void geoip_free_all(void);
/** Directory requests that we are measuring can be either direct or
* tunneled. */
typedef enum {
REQUEST_DIRECT = 0,
REQUEST_TUNNELED = 1,
} directory_request_type_t;
/** Possible states for either direct or tunneled directory requests that
* are relevant for determining network status download times. */
typedef enum {
/** Found that the client requests a network status; applies to both
* direct and tunneled requests; initial state of a request that we are
* measuring. */
REQUEST_IS_FOR_NETWORK_STATUS = 0,
/** Finished writing a network status to the directory connection;
* applies to both direct and tunneled requests; completes a direct
* request. */
FLUSHING_DIR_CONN_FINISHED = 1,
/** END cell sent to circuit that initiated a tunneled request. */
END_CELL_SENT = 2,
/** Flushed last cell from queue of the circuit that initiated a
* tunneled request to the outbuf of the OR connection. */
CIRC_QUEUE_FLUSHED = 3,
/** Flushed last byte from buffer of the OR connection belonging to the
* circuit that initiated a tunneled request; completes a tunneled
* request. */
OR_CONN_BUFFER_FLUSHED = 4
} directory_request_state_t;
void geoip_start_dirreq(uint64_t request_id, size_t response_size,
geoip_client_action_t action,
directory_request_type_t type);
void geoip_change_dirreq_state(uint64_t request_id,
directory_request_type_t type,
directory_request_state_t new_state);
/********************************* hibernate.c **********************/ /********************************* hibernate.c **********************/
int accounting_parse_options(or_options_t *options, int validate_only); int accounting_parse_options(or_options_t *options, int validate_only);

View File

@ -532,6 +532,14 @@ relay_send_command_from_edge(uint16_t stream_id, circuit_t *circ,
log_debug(LD_OR,"delivering %d cell %s.", relay_command, log_debug(LD_OR,"delivering %d cell %s.", relay_command,
cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward"); cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward");
#ifdef ENABLE_GEOIP_STATS
/* If we are sending an END cell and this circuit is used for a tunneled
* directory request, advance its state. */
if (relay_command == RELAY_COMMAND_END && circ->request_id)
geoip_change_dirreq_state(circ->request_id, REQUEST_TUNNELED,
END_CELL_SENT);
#endif
if (cell_direction == CELL_DIRECTION_OUT && circ->n_conn) { if (cell_direction == CELL_DIRECTION_OUT && circ->n_conn) {
/* if we're using relaybandwidthrate, this conn wants priority */ /* if we're using relaybandwidthrate, this conn wants priority */
circ->n_conn->client_used = approx_time(); circ->n_conn->client_used = approx_time();
@ -1032,6 +1040,18 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
"Begin cell for known stream. Dropping."); "Begin cell for known stream. Dropping.");
return 0; return 0;
} }
#ifdef ENABLE_GEOIP_STATS
if (rh.command == RELAY_COMMAND_BEGIN_DIR) {
/* Assign this circuit and its app-ward OR connection a unique ID,
* so that we can measure download times. The local edge and dir
* connection will be assigned the same ID when they are created
* and linked. */
static uint64_t next_id = 0;
circ->request_id = ++next_id;
TO_CONN(TO_OR_CIRCUIT(circ)->p_conn)->request_id = circ->request_id;
}
#endif
return connection_exit_begin_conn(cell, circ); return connection_exit_begin_conn(cell, circ);
case RELAY_COMMAND_DATA: case RELAY_COMMAND_DATA:
++stats_n_data_cells_received; ++stats_n_data_cells_received;
@ -1821,6 +1841,14 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max,
orcirc->processed_cells++; orcirc->processed_cells++;
} }
#endif #endif
#ifdef ENABLE_GEOIP_STATS
/* If we just flushed our queue and this circuit is used for a
* tunneled directory request, possibly advance its state. */
if (queue->n == 0 && TO_CONN(conn)->request_id)
geoip_change_dirreq_state(TO_CONN(conn)->request_id,
REQUEST_TUNNELED, CIRC_QUEUE_FLUSHED);
#endif
connection_write_to_buf(cell->body, CELL_NETWORK_SIZE, TO_CONN(conn)); connection_write_to_buf(cell->body, CELL_NETWORK_SIZE, TO_CONN(conn));
packed_cell_free(cell); packed_cell_free(cell);