From 3c051325758c0aeeeb44054715e16d8b8717948c Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Sun, 12 Jul 2009 16:33:31 +0200 Subject: [PATCH 1/6] Directories now also measure download times of network statuses. --- ChangeLog | 1 + src/or/connection.c | 7 ++ src/or/connection_edge.c | 10 ++ src/or/directory.c | 22 +++- src/or/geoip.c | 215 +++++++++++++++++++++++++++++++++++++++ src/or/or.h | 44 ++++++++ src/or/relay.c | 28 +++++ 7 files changed, 326 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 30a776c29e..6b4f651828 100644 --- a/ChangeLog +++ b/ChangeLog @@ -20,6 +20,7 @@ Changes in version 0.2.2.1-alpha - 2009-??-?? Estimated shares of v2 and v3 requests are determined as averages, not at the end of a measurement period. Also, unresolved requests are listed with country code '??'. + Directories now also measure download times of network statuses. - Exit nodes can write statistics on the number of exit streams and transferred bytes per port to disk every 24 hours. To enable this, run configure with the --enable-exit-stats option, and set diff --git a/src/or/connection.c b/src/or/connection.c index 0897eb4a5e..309421051a 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -2302,6 +2302,13 @@ connection_handle_write(connection_t *conn, int force) /* else open, or closing */ result = flush_buf_tls(or_conn->tls, conn->outbuf, max_to_write, &conn->outbuf_flushlen); +#ifdef ENABLE_GEOIP_STATS + /* If we just flushed the last bytes, check if this tunneled dir + * request is done. */ + if (buf_datalen(conn->outbuf) == 0 && conn->request_id) + geoip_change_dirreq_state(conn->request_id, REQUEST_TUNNELED, + OR_CONN_BUFFER_FLUSHED); +#endif switch (result) { CASE_TOR_TLS_ERROR_ANY: case TOR_TLS_CLOSE: diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c index 560a2433d7..f32563bcdb 100644 --- a/src/or/connection_edge.c +++ b/src/or/connection_edge.c @@ -2551,6 +2551,11 @@ connection_exit_begin_conn(cell_t *cell, circuit_t *circ) log_debug(LD_EXIT,"Creating new exit connection."); n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET); +#ifdef ENABLE_GEOIP_STATS + /* Remember the tunneled request ID in the new edge connection, so that + * we can measure download times. */ + TO_CONN(n_stream)->request_id = circ->request_id; +#endif n_stream->_base.purpose = EXIT_PURPOSE_CONNECT; n_stream->stream_id = rh.stream_id; @@ -2787,6 +2792,11 @@ connection_exit_connect_dir(edge_connection_t *exitconn) dirconn->_base.purpose = DIR_PURPOSE_SERVER; dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT; +#ifdef ENABLE_GEOIP_STATS + /* Note that the new dir conn belongs to the same tunneled request as + * the edge conn, so that we can measure download times. */ + TO_CONN(dirconn)->request_id = TO_CONN(exitconn)->request_id; +#endif connection_link_connections(TO_CONN(dirconn), TO_CONN(exitconn)); if (connection_add(TO_CONN(exitconn))<0) { diff --git a/src/or/directory.c b/src/or/directory.c index cf9f5543d7..e5230c2cbd 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -2565,9 +2565,18 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers, #ifdef ENABLE_GEOIP_STATS { struct in_addr in; - if (tor_inet_aton((TO_CONN(conn))->address, &in)) + if (tor_inet_aton((TO_CONN(conn))->address, &in)) { geoip_note_client_seen(act, ntohl(in.s_addr), time(NULL)); geoip_note_ns_response(act, GEOIP_SUCCESS); + /* Note that a request for a network status has started, so that we + * can measure the download time later on. */ + if (TO_CONN(conn)->request_id) + geoip_start_dirreq(TO_CONN(conn)->request_id, dlen, act, + REQUEST_TUNNELED); + else + geoip_start_dirreq(TO_CONN(conn)->global_identifier, dlen, act, + REQUEST_DIRECT); + } } #endif @@ -3201,6 +3210,17 @@ connection_dir_finished_flushing(dir_connection_t *conn) tor_assert(conn); tor_assert(conn->_base.type == CONN_TYPE_DIR); +#ifdef ENABLE_GEOIP_STATS + /* Note that we have finished writing the directory response. For direct + * connections this means we're done, for tunneled connections its only + * an intermediate step. */ + if (TO_CONN(conn)->request_id) + geoip_change_dirreq_state(TO_CONN(conn)->request_id, REQUEST_TUNNELED, + FLUSHING_DIR_CONN_FINISHED); + else + geoip_change_dirreq_state(TO_CONN(conn)->global_identifier, + REQUEST_DIRECT, FLUSHING_DIR_CONN_FINISHED); +#endif switch (conn->_base.state) { case DIR_CONN_STATE_CLIENT_SENDING: log_debug(LD_DIR,"client finished sending command."); diff --git a/src/or/geoip.c b/src/or/geoip.c index 14b5f66263..a7e46c0b8f 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -570,6 +570,204 @@ _c_hist_compare(const void **_a, const void **_b) return strcmp(a->country, b->country); } +/** When there are incomplete directory requests at the end of a 24-hour + * period, consider those requests running for longer than this timeout as + * failed, the others as still running. */ +#define DIRREQ_TIMEOUT (10*60) + +/** Entry in a map from either conn->global_identifier for direct requests + * or a unique circuit identifier for tunneled requests to request time, + * response size, and completion time of a network status request. Used to + * measure download times of requests to derive average client + * bandwidths. */ +typedef struct dirreqdlmap_entry_t { + directory_request_state_t state; /**< State of this directory request. */ + /** Unique identifier for this network status request; this is either the + * conn->global_identifier of the dir conn (direct request) or a new + * locally unique identifier of a circuit (tunneled request). This ID is + * only unique among other direct or tunneled requests, respectively. */ + uint64_t request_id; + /** Is this a direct or a tunneled request? */ + directory_request_type_t type; + int completed:1; /**< Is this request complete? */ + geoip_client_action_t action; /**< Is this a v2 or v3 request? */ + /** When did we receive the request and started sending the response? */ + struct timeval request_time; + size_t response_size; /**< What is the size of the response in bytes? */ + struct timeval completion_time; /**< When did the request succeed? */ +} dirreqdlmap_entry_t; + +/** Map of all directory requests asking for v2 or v3 network statuses in + * the current geoip-stats interval. Keys are strings starting with either + * "dir" for direct requests or "tun" for tunneled requests, followed by + * a unique uint64_t identifier represented as decimal string. Values are + * of type *dirreqdlmap_entry_t. */ +static strmap_t *dirreqdlmap = NULL; + +/** Helper: Put entry into map of directory requests using + * tunneled and request_id as key parts. If there is + * already an entry for that key, print out a BUG warning and return. */ +static void +_dirreqdlmap_put(dirreqdlmap_entry_t *entry, + directory_request_type_t type, uint64_t request_id) +{ + char key[3+20+1]; /* dir|tun + -9223372036854775808 + \0 */ + dirreqdlmap_entry_t *ent; + if (!dirreqdlmap) + dirreqdlmap = strmap_new(); + tor_snprintf(key, sizeof(key), "%s"U64_FORMAT, + type == REQUEST_TUNNELED ? "tun" : "dir", + U64_PRINTF_ARG(request_id)); + ent = strmap_get(dirreqdlmap, key); + if (ent) { + log_warn(LD_BUG, "Error when putting directory request into local " + "map. There is already an entry for the same identifier."); + return; + } + strmap_set(dirreqdlmap, key, entry); +} + +/** Helper: Look up and return an entry in the map of directory requests + * using tunneled and request_id as key parts. If there + * is no such entry, return NULL. */ +static dirreqdlmap_entry_t * +_dirreqdlmap_get(directory_request_type_t type, uint64_t request_id) +{ + char key[3+20+1]; /* dir|tun + -9223372036854775808 + \0 */ + if (!dirreqdlmap) + dirreqdlmap = strmap_new(); + tor_snprintf(key, sizeof(key), "%s"U64_FORMAT, + type == REQUEST_TUNNELED ? "tun" : "dir", + U64_PRINTF_ARG(request_id)); + return strmap_get(dirreqdlmap, key); +} + +/** Note that an either direct or tunneled (see type) directory + * request for a network status with unique ID request_id of size + * response_size and action action (either v2 or v3) has + * started. */ +void +geoip_start_dirreq(uint64_t request_id, size_t response_size, + geoip_client_action_t action, + directory_request_type_t type) +{ + dirreqdlmap_entry_t *ent = tor_malloc_zero(sizeof(dirreqdlmap_entry_t)); + ent->request_id = request_id; + tor_gettimeofday(&ent->request_time); + ent->response_size = response_size; + ent->action = action; + ent->type = type; + _dirreqdlmap_put(ent, type, request_id); +} + +/** Change the state of the either direct or tunneled (see type) + * directory request with request_id to new_state and + * possibly mark it as completed. If no entry can be found for the given + * key parts (e.g., if this is a directory request that we are not + * measuring, or one that was started in the previous measurement period), + * or if the state cannot be advanced to new_state, do nothing. */ +void +geoip_change_dirreq_state(uint64_t request_id, + directory_request_type_t type, + directory_request_state_t new_state) +{ + dirreqdlmap_entry_t *ent = _dirreqdlmap_get(type, request_id); + if (!ent) + return; + if (new_state == REQUEST_IS_FOR_NETWORK_STATUS) + return; + if (new_state - 1 != ent->state) + return; + ent->state = new_state; + if ((type == REQUEST_DIRECT && new_state == FLUSHING_DIR_CONN_FINISHED) || + (type == REQUEST_TUNNELED && new_state == OR_CONN_BUFFER_FLUSHED)) { + tor_gettimeofday(&ent->completion_time); + ent->completed = 1; + } +} + +#ifdef ENABLE_GEOIP_STATS +/** Return a newly allocated comma-separated string containing statistics + * on network status downloads. The string contains the number of completed + * requests, timeouts, and still running requests as well as the download + * times by deciles and quartiles. Return NULL if we have not observed + * requests for long enough. */ +static char * +geoip_get_dirreqdl_history(geoip_client_action_t action, + directory_request_type_t type) +{ + char *result = NULL; + smartlist_t *dirreqdl_times = NULL; + uint32_t complete = 0, timeouts = 0, running = 0; + int i = 0, bufsize = 1024, written; + struct timeval now; + tor_gettimeofday(&now); + if (!dirreqdlmap) + return NULL; + if (action != GEOIP_CLIENT_NETWORKSTATUS && + action != GEOIP_CLIENT_NETWORKSTATUS_V2) + return NULL; + dirreqdl_times = smartlist_create(); + STRMAP_FOREACH_MODIFY(dirreqdlmap, key, dirreqdlmap_entry_t *, ent) { + if (ent->action == action && type == ent->type) { + if (ent->completed) { + uint32_t *bytes_per_second = tor_malloc_zero(sizeof(uint32_t)); + uint32_t time_diff = (uint32_t) tv_udiff(&ent->request_time, + &ent->completion_time); + if (time_diff == 0) + time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible + * anyway by law of nature or something.. */ + *bytes_per_second = 1000000 * ent->response_size / time_diff; + smartlist_add(dirreqdl_times, bytes_per_second); + complete++; + } else { + if (tv_udiff(&ent->request_time, &now) / 1000000 > DIRREQ_TIMEOUT) + timeouts++; + else + running++; + } + tor_free(ent); + MAP_DEL_CURRENT(key); + } + } STRMAP_FOREACH_END; + result = tor_malloc_zero(bufsize); + written = tor_snprintf(result, bufsize, "complete=%d,timeout=%d," + "running=%d", complete, timeouts, running); + if (written < 0) + return NULL; +#define MIN_DIR_REQ_RESPONSES 16 + if (complete >= MIN_DIR_REQ_RESPONSES) { + uint32_t *dltimes = tor_malloc(sizeof(uint32_t) * complete); + SMARTLIST_FOREACH(dirreqdl_times, uint32_t *, dlt, { + dltimes[i++] = *dlt; + tor_free(dlt); + }); + median_uint32(dltimes, complete); /* sort */ + written = tor_snprintf(result + written, bufsize - written, + ",min=%d,d1=%d,d2=%d,q1=%d,d3=%d,d4=%d,md=%d," + "d6=%d,d7=%d,q3=%d,d8=%d,d9=%d,max=%d", + dltimes[0], + dltimes[1*complete/10-1], + dltimes[2*complete/10-1], + dltimes[1*complete/4-1], + dltimes[3*complete/10-1], + dltimes[4*complete/10-1], + dltimes[5*complete/10-1], + dltimes[6*complete/10-1], + dltimes[7*complete/10-1], + dltimes[3*complete/4-1], + dltimes[8*complete/10-1], + dltimes[9*complete/10-1], + dltimes[complete-1]); + tor_free(dltimes); + } + if (written < 0) + result = NULL; + smartlist_free(dirreqdl_times); + return result; +} +#endif + /** How long do we have to have observed per-country request history before we * are willing to talk about it? */ #define GEOIP_MIN_OBSERVATION_TIME (12*60*60) @@ -785,6 +983,23 @@ dump_geoip_stats(void) goto done; } + data_v2 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS_V2, + REQUEST_DIRECT); + data_v3 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS, + REQUEST_DIRECT); + if (fprintf(out, "ns-direct-dl %s\nns-v2-direct-dl %s\n", + data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0) + goto done; + tor_free(data_v2); + tor_free(data_v3); + data_v2 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS_V2, + REQUEST_TUNNELED); + data_v3 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS, + REQUEST_TUNNELED); + if (fprintf(out, "ns-tunneled-dl %s\nns-v2-tunneled-dl %s\n", + data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0) + goto done; + finish_writing_to_file(open_file); open_file = NULL; done: diff --git a/src/or/or.h b/src/or/or.h index c18ef15faf..035d4edc0a 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -970,6 +970,10 @@ typedef struct connection_t { * to the evdns_server_port is uses to listen to and answer connections. */ struct evdns_server_port *dns_server_port; +#ifdef ENABLE_GEOIP_STATS + /** Unique ID for measuring tunneled network status requests. */ + uint64_t request_id; +#endif } connection_t; /** Stores flags and information related to the portion of a v2 Tor OR @@ -1956,6 +1960,10 @@ typedef struct circuit_t { * linked to an OR connection. */ struct circuit_t *prev_active_on_n_conn; struct circuit_t *next; /**< Next circuit in linked list of all circuits. */ +#ifdef ENABLE_GEOIP_STATS + /** Unique ID for measuring tunneled network status requests. */ + uint64_t request_id; +#endif } circuit_t; /** Largest number of relay_early cells that we can send on a given @@ -3672,6 +3680,42 @@ int getinfo_helper_geoip(control_connection_t *control_conn, const char *question, char **answer); void geoip_free_all(void); +/** Directory requests that we are measuring can be either direct or + * tunneled. */ +typedef enum { + REQUEST_DIRECT = 0, + REQUEST_TUNNELED = 1, +} directory_request_type_t; + +/** Possible states for either direct or tunneled directory requests that + * are relevant for determining network status download times. */ +typedef enum { + /** Found that the client requests a network status; applies to both + * direct and tunneled requests; initial state of a request that we are + * measuring. */ + REQUEST_IS_FOR_NETWORK_STATUS = 0, + /** Finished writing a network status to the directory connection; + * applies to both direct and tunneled requests; completes a direct + * request. */ + FLUSHING_DIR_CONN_FINISHED = 1, + /** END cell sent to circuit that initiated a tunneled request. */ + END_CELL_SENT = 2, + /** Flushed last cell from queue of the circuit that initiated a + * tunneled request to the outbuf of the OR connection. */ + CIRC_QUEUE_FLUSHED = 3, + /** Flushed last byte from buffer of the OR connection belonging to the + * circuit that initiated a tunneled request; completes a tunneled + * request. */ + OR_CONN_BUFFER_FLUSHED = 4 +} directory_request_state_t; + +void geoip_start_dirreq(uint64_t request_id, size_t response_size, + geoip_client_action_t action, + directory_request_type_t type); +void geoip_change_dirreq_state(uint64_t request_id, + directory_request_type_t type, + directory_request_state_t new_state); + /********************************* hibernate.c **********************/ int accounting_parse_options(or_options_t *options, int validate_only); diff --git a/src/or/relay.c b/src/or/relay.c index eb18bbaade..580048be7b 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -532,6 +532,14 @@ relay_send_command_from_edge(uint16_t stream_id, circuit_t *circ, log_debug(LD_OR,"delivering %d cell %s.", relay_command, cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward"); +#ifdef ENABLE_GEOIP_STATS + /* If we are sending an END cell and this circuit is used for a tunneled + * directory request, advance its state. */ + if (relay_command == RELAY_COMMAND_END && circ->request_id) + geoip_change_dirreq_state(circ->request_id, REQUEST_TUNNELED, + END_CELL_SENT); +#endif + if (cell_direction == CELL_DIRECTION_OUT && circ->n_conn) { /* if we're using relaybandwidthrate, this conn wants priority */ circ->n_conn->client_used = approx_time(); @@ -1032,6 +1040,18 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ, "Begin cell for known stream. Dropping."); return 0; } +#ifdef ENABLE_GEOIP_STATS + if (rh.command == RELAY_COMMAND_BEGIN_DIR) { + /* Assign this circuit and its app-ward OR connection a unique ID, + * so that we can measure download times. The local edge and dir + * connection will be assigned the same ID when they are created + * and linked. */ + static uint64_t next_id = 0; + circ->request_id = ++next_id; + TO_CONN(TO_OR_CIRCUIT(circ)->p_conn)->request_id = circ->request_id; + } +#endif + return connection_exit_begin_conn(cell, circ); case RELAY_COMMAND_DATA: ++stats_n_data_cells_received; @@ -1821,6 +1841,14 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, orcirc->processed_cells++; } #endif +#ifdef ENABLE_GEOIP_STATS + /* If we just flushed our queue and this circuit is used for a + * tunneled directory request, possibly advance its state. */ + if (queue->n == 0 && TO_CONN(conn)->request_id) + geoip_change_dirreq_state(TO_CONN(conn)->request_id, + REQUEST_TUNNELED, CIRC_QUEUE_FLUSHED); +#endif + connection_write_to_buf(cell->body, CELL_NETWORK_SIZE, TO_CONN(conn)); packed_cell_free(cell); From 4002980d1cbb95c70528a35aa2491fff4345999d Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Mon, 13 Jul 2009 01:30:30 +0200 Subject: [PATCH 2/6] Right, the u in uint stands for unsigned. --- src/or/geoip.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/or/geoip.c b/src/or/geoip.c index a7e46c0b8f..1430f34ae5 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -611,7 +611,7 @@ static void _dirreqdlmap_put(dirreqdlmap_entry_t *entry, directory_request_type_t type, uint64_t request_id) { - char key[3+20+1]; /* dir|tun + -9223372036854775808 + \0 */ + char key[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */ dirreqdlmap_entry_t *ent; if (!dirreqdlmap) dirreqdlmap = strmap_new(); @@ -633,7 +633,7 @@ _dirreqdlmap_put(dirreqdlmap_entry_t *entry, static dirreqdlmap_entry_t * _dirreqdlmap_get(directory_request_type_t type, uint64_t request_id) { - char key[3+20+1]; /* dir|tun + -9223372036854775808 + \0 */ + char key[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */ if (!dirreqdlmap) dirreqdlmap = strmap_new(); tor_snprintf(key, sizeof(key), "%s"U64_FORMAT, @@ -731,8 +731,8 @@ geoip_get_dirreqdl_history(geoip_client_action_t action, } } STRMAP_FOREACH_END; result = tor_malloc_zero(bufsize); - written = tor_snprintf(result, bufsize, "complete=%d,timeout=%d," - "running=%d", complete, timeouts, running); + written = tor_snprintf(result, bufsize, "complete=%u,timeout=%u," + "running=%u", complete, timeouts, running); if (written < 0) return NULL; #define MIN_DIR_REQ_RESPONSES 16 @@ -744,8 +744,8 @@ geoip_get_dirreqdl_history(geoip_client_action_t action, }); median_uint32(dltimes, complete); /* sort */ written = tor_snprintf(result + written, bufsize - written, - ",min=%d,d1=%d,d2=%d,q1=%d,d3=%d,d4=%d,md=%d," - "d6=%d,d7=%d,q3=%d,d8=%d,d9=%d,max=%d", + ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u," + "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u", dltimes[0], dltimes[1*complete/10-1], dltimes[2*complete/10-1], From 85e7f67e1c773e81a0269858169a9992eeb53da6 Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Tue, 14 Jul 2009 02:04:20 +0200 Subject: [PATCH 3/6] Round up results to the next multiple of 4. --- src/or/geoip.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/or/geoip.c b/src/or/geoip.c index 1430f34ae5..819c9f0182 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -730,6 +730,13 @@ geoip_get_dirreqdl_history(geoip_client_action_t action, MAP_DEL_CURRENT(key); } } STRMAP_FOREACH_END; +#define DIR_REQ_GRANULARITY 4 + complete = round_uint32_to_next_multiple_of(complete, + DIR_REQ_GRANULARITY); + timeouts = round_uint32_to_next_multiple_of(timeouts, + DIR_REQ_GRANULARITY); + running = round_uint32_to_next_multiple_of(running, + DIR_REQ_GRANULARITY); result = tor_malloc_zero(bufsize); written = tor_snprintf(result, bufsize, "complete=%u,timeout=%u," "running=%u", complete, timeouts, running); From 416940d93bac49f78b57a2cf561bd324d75b391f Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Tue, 14 Jul 2009 22:24:50 +0200 Subject: [PATCH 4/6] Some tweaks to directory request download times. - Use common prefixes DIRREQ_* and dirreq_*. - Replace enums in structs with bitfields. --- src/or/connection.c | 6 +- src/or/connection_edge.c | 4 +- src/or/directory.c | 17 +++--- src/or/geoip.c | 119 +++++++++++++++++++-------------------- src/or/or.h | 32 +++++------ src/or/relay.c | 17 +++--- 6 files changed, 97 insertions(+), 98 deletions(-) diff --git a/src/or/connection.c b/src/or/connection.c index 309421051a..242a32c2d9 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -2305,9 +2305,9 @@ connection_handle_write(connection_t *conn, int force) #ifdef ENABLE_GEOIP_STATS /* If we just flushed the last bytes, check if this tunneled dir * request is done. */ - if (buf_datalen(conn->outbuf) == 0 && conn->request_id) - geoip_change_dirreq_state(conn->request_id, REQUEST_TUNNELED, - OR_CONN_BUFFER_FLUSHED); + if (buf_datalen(conn->outbuf) == 0 && conn->dirreq_id) + geoip_change_dirreq_state(conn->dirreq_id, DIRREQ_TUNNELED, + DIRREQ_OR_CONN_BUFFER_FLUSHED); #endif switch (result) { CASE_TOR_TLS_ERROR_ANY: diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c index f32563bcdb..f2b499fd6c 100644 --- a/src/or/connection_edge.c +++ b/src/or/connection_edge.c @@ -2554,7 +2554,7 @@ connection_exit_begin_conn(cell_t *cell, circuit_t *circ) #ifdef ENABLE_GEOIP_STATS /* Remember the tunneled request ID in the new edge connection, so that * we can measure download times. */ - TO_CONN(n_stream)->request_id = circ->request_id; + TO_CONN(n_stream)->dirreq_id = circ->dirreq_id; #endif n_stream->_base.purpose = EXIT_PURPOSE_CONNECT; @@ -2795,7 +2795,7 @@ connection_exit_connect_dir(edge_connection_t *exitconn) #ifdef ENABLE_GEOIP_STATS /* Note that the new dir conn belongs to the same tunneled request as * the edge conn, so that we can measure download times. */ - TO_CONN(dirconn)->request_id = TO_CONN(exitconn)->request_id; + TO_CONN(dirconn)->dirreq_id = TO_CONN(exitconn)->dirreq_id; #endif connection_link_connections(TO_CONN(dirconn), TO_CONN(exitconn)); diff --git a/src/or/directory.c b/src/or/directory.c index e5230c2cbd..c6faeae2e1 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -2570,12 +2570,12 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers, geoip_note_ns_response(act, GEOIP_SUCCESS); /* Note that a request for a network status has started, so that we * can measure the download time later on. */ - if (TO_CONN(conn)->request_id) - geoip_start_dirreq(TO_CONN(conn)->request_id, dlen, act, - REQUEST_TUNNELED); + if (TO_CONN(conn)->dirreq_id) + geoip_start_dirreq(TO_CONN(conn)->dirreq_id, dlen, act, + DIRREQ_TUNNELED); else geoip_start_dirreq(TO_CONN(conn)->global_identifier, dlen, act, - REQUEST_DIRECT); + DIRREQ_DIRECT); } } #endif @@ -3214,12 +3214,13 @@ connection_dir_finished_flushing(dir_connection_t *conn) /* Note that we have finished writing the directory response. For direct * connections this means we're done, for tunneled connections its only * an intermediate step. */ - if (TO_CONN(conn)->request_id) - geoip_change_dirreq_state(TO_CONN(conn)->request_id, REQUEST_TUNNELED, - FLUSHING_DIR_CONN_FINISHED); + if (TO_CONN(conn)->dirreq_id) + geoip_change_dirreq_state(TO_CONN(conn)->dirreq_id, DIRREQ_TUNNELED, + DIRREQ_FLUSHING_DIR_CONN_FINISHED); else geoip_change_dirreq_state(TO_CONN(conn)->global_identifier, - REQUEST_DIRECT, FLUSHING_DIR_CONN_FINISHED); + DIRREQ_DIRECT, + DIRREQ_FLUSHING_DIR_CONN_FINISHED); #endif switch (conn->_base.state) { case DIR_CONN_STATE_CLIENT_SENDING: diff --git a/src/or/geoip.c b/src/or/geoip.c index 819c9f0182..0ecc466088 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -580,107 +580,106 @@ _c_hist_compare(const void **_a, const void **_b) * response size, and completion time of a network status request. Used to * measure download times of requests to derive average client * bandwidths. */ -typedef struct dirreqdlmap_entry_t { - directory_request_state_t state; /**< State of this directory request. */ +typedef struct dirreq_map_entry_t { /** Unique identifier for this network status request; this is either the * conn->global_identifier of the dir conn (direct request) or a new * locally unique identifier of a circuit (tunneled request). This ID is * only unique among other direct or tunneled requests, respectively. */ - uint64_t request_id; - /** Is this a direct or a tunneled request? */ - directory_request_type_t type; - int completed:1; /**< Is this request complete? */ - geoip_client_action_t action; /**< Is this a v2 or v3 request? */ + uint64_t dirreq_id; + unsigned int state:3; /**< State of this directory request. */ + unsigned int type:1; /**< Is this a direct or a tunneled request? */ + unsigned int completed:1; /**< Is this request complete? */ + unsigned int action:2; /**< Is this a v2 or v3 request? */ /** When did we receive the request and started sending the response? */ struct timeval request_time; size_t response_size; /**< What is the size of the response in bytes? */ struct timeval completion_time; /**< When did the request succeed? */ -} dirreqdlmap_entry_t; +} dirreq_map_entry_t; /** Map of all directory requests asking for v2 or v3 network statuses in * the current geoip-stats interval. Keys are strings starting with either * "dir" for direct requests or "tun" for tunneled requests, followed by * a unique uint64_t identifier represented as decimal string. Values are - * of type *dirreqdlmap_entry_t. */ -static strmap_t *dirreqdlmap = NULL; + * of type *dirreq_map_entry_t. */ +static strmap_t *dirreq_map = NULL; /** Helper: Put entry into map of directory requests using - * tunneled and request_id as key parts. If there is + * tunneled and dirreq_id as key parts. If there is * already an entry for that key, print out a BUG warning and return. */ static void -_dirreqdlmap_put(dirreqdlmap_entry_t *entry, - directory_request_type_t type, uint64_t request_id) +_dirreq_map_put(dirreq_map_entry_t *entry, dirreq_type_t type, + uint64_t dirreq_id) { char key[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */ - dirreqdlmap_entry_t *ent; - if (!dirreqdlmap) - dirreqdlmap = strmap_new(); + dirreq_map_entry_t *ent; + if (!dirreq_map) + dirreq_map = strmap_new(); tor_snprintf(key, sizeof(key), "%s"U64_FORMAT, - type == REQUEST_TUNNELED ? "tun" : "dir", - U64_PRINTF_ARG(request_id)); - ent = strmap_get(dirreqdlmap, key); + type == DIRREQ_TUNNELED ? "tun" : "dir", + U64_PRINTF_ARG(dirreq_id)); + ent = strmap_get(dirreq_map, key); if (ent) { log_warn(LD_BUG, "Error when putting directory request into local " "map. There is already an entry for the same identifier."); return; } - strmap_set(dirreqdlmap, key, entry); + strmap_set(dirreq_map, key, entry); } /** Helper: Look up and return an entry in the map of directory requests - * using tunneled and request_id as key parts. If there + * using tunneled and dirreq_id as key parts. If there * is no such entry, return NULL. */ -static dirreqdlmap_entry_t * -_dirreqdlmap_get(directory_request_type_t type, uint64_t request_id) +static dirreq_map_entry_t * +_dirreq_map_get(dirreq_type_t type, uint64_t dirreq_id) { char key[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */ - if (!dirreqdlmap) - dirreqdlmap = strmap_new(); + if (!dirreq_map) + dirreq_map = strmap_new(); tor_snprintf(key, sizeof(key), "%s"U64_FORMAT, - type == REQUEST_TUNNELED ? "tun" : "dir", - U64_PRINTF_ARG(request_id)); - return strmap_get(dirreqdlmap, key); + type == DIRREQ_TUNNELED ? "tun" : "dir", + U64_PRINTF_ARG(dirreq_id)); + return strmap_get(dirreq_map, key); } /** Note that an either direct or tunneled (see type) directory - * request for a network status with unique ID request_id of size + * request for a network status with unique ID dirreq_id of size * response_size and action action (either v2 or v3) has * started. */ void -geoip_start_dirreq(uint64_t request_id, size_t response_size, - geoip_client_action_t action, - directory_request_type_t type) +geoip_start_dirreq(uint64_t dirreq_id, size_t response_size, + geoip_client_action_t action, dirreq_type_t type) { - dirreqdlmap_entry_t *ent = tor_malloc_zero(sizeof(dirreqdlmap_entry_t)); - ent->request_id = request_id; + dirreq_map_entry_t *ent = tor_malloc_zero(sizeof(dirreq_map_entry_t)); + ent->dirreq_id = dirreq_id; tor_gettimeofday(&ent->request_time); ent->response_size = response_size; ent->action = action; ent->type = type; - _dirreqdlmap_put(ent, type, request_id); + _dirreq_map_put(ent, type, dirreq_id); } /** Change the state of the either direct or tunneled (see type) - * directory request with request_id to new_state and + * directory request with dirreq_id to new_state and * possibly mark it as completed. If no entry can be found for the given * key parts (e.g., if this is a directory request that we are not * measuring, or one that was started in the previous measurement period), * or if the state cannot be advanced to new_state, do nothing. */ void -geoip_change_dirreq_state(uint64_t request_id, - directory_request_type_t type, - directory_request_state_t new_state) +geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type, + dirreq_state_t new_state) { - dirreqdlmap_entry_t *ent = _dirreqdlmap_get(type, request_id); + dirreq_map_entry_t *ent = _dirreq_map_get(type, dirreq_id); if (!ent) return; - if (new_state == REQUEST_IS_FOR_NETWORK_STATUS) + if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS) return; if (new_state - 1 != ent->state) return; ent->state = new_state; - if ((type == REQUEST_DIRECT && new_state == FLUSHING_DIR_CONN_FINISHED) || - (type == REQUEST_TUNNELED && new_state == OR_CONN_BUFFER_FLUSHED)) { + if ((type == DIRREQ_DIRECT && + new_state == DIRREQ_FLUSHING_DIR_CONN_FINISHED) || + (type == DIRREQ_TUNNELED && + new_state == DIRREQ_OR_CONN_BUFFER_FLUSHED)) { tor_gettimeofday(&ent->completion_time); ent->completed = 1; } @@ -693,22 +692,22 @@ geoip_change_dirreq_state(uint64_t request_id, * times by deciles and quartiles. Return NULL if we have not observed * requests for long enough. */ static char * -geoip_get_dirreqdl_history(geoip_client_action_t action, - directory_request_type_t type) +geoip_get_dirreq_history(geoip_client_action_t action, + dirreq_type_t type) { char *result = NULL; - smartlist_t *dirreqdl_times = NULL; + smartlist_t *dirreq_times = NULL; uint32_t complete = 0, timeouts = 0, running = 0; int i = 0, bufsize = 1024, written; struct timeval now; tor_gettimeofday(&now); - if (!dirreqdlmap) + if (!dirreq_map) return NULL; if (action != GEOIP_CLIENT_NETWORKSTATUS && action != GEOIP_CLIENT_NETWORKSTATUS_V2) return NULL; - dirreqdl_times = smartlist_create(); - STRMAP_FOREACH_MODIFY(dirreqdlmap, key, dirreqdlmap_entry_t *, ent) { + dirreq_times = smartlist_create(); + STRMAP_FOREACH_MODIFY(dirreq_map, key, dirreq_map_entry_t *, ent) { if (ent->action == action && type == ent->type) { if (ent->completed) { uint32_t *bytes_per_second = tor_malloc_zero(sizeof(uint32_t)); @@ -718,7 +717,7 @@ geoip_get_dirreqdl_history(geoip_client_action_t action, time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible * anyway by law of nature or something.. */ *bytes_per_second = 1000000 * ent->response_size / time_diff; - smartlist_add(dirreqdl_times, bytes_per_second); + smartlist_add(dirreq_times, bytes_per_second); complete++; } else { if (tv_udiff(&ent->request_time, &now) / 1000000 > DIRREQ_TIMEOUT) @@ -745,7 +744,7 @@ geoip_get_dirreqdl_history(geoip_client_action_t action, #define MIN_DIR_REQ_RESPONSES 16 if (complete >= MIN_DIR_REQ_RESPONSES) { uint32_t *dltimes = tor_malloc(sizeof(uint32_t) * complete); - SMARTLIST_FOREACH(dirreqdl_times, uint32_t *, dlt, { + SMARTLIST_FOREACH(dirreq_times, uint32_t *, dlt, { dltimes[i++] = *dlt; tor_free(dlt); }); @@ -770,7 +769,7 @@ geoip_get_dirreqdl_history(geoip_client_action_t action, } if (written < 0) result = NULL; - smartlist_free(dirreqdl_times); + smartlist_free(dirreq_times); return result; } #endif @@ -990,19 +989,19 @@ dump_geoip_stats(void) goto done; } - data_v2 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS_V2, - REQUEST_DIRECT); - data_v3 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS, - REQUEST_DIRECT); + data_v2 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2, + DIRREQ_DIRECT); + data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS, + DIRREQ_DIRECT); if (fprintf(out, "ns-direct-dl %s\nns-v2-direct-dl %s\n", data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0) goto done; tor_free(data_v2); tor_free(data_v3); - data_v2 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS_V2, - REQUEST_TUNNELED); - data_v3 = geoip_get_dirreqdl_history(GEOIP_CLIENT_NETWORKSTATUS, - REQUEST_TUNNELED); + data_v2 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2, + DIRREQ_TUNNELED); + data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS, + DIRREQ_TUNNELED); if (fprintf(out, "ns-tunneled-dl %s\nns-v2-tunneled-dl %s\n", data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0) goto done; diff --git a/src/or/or.h b/src/or/or.h index 035d4edc0a..7b91ff7c33 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -972,7 +972,7 @@ typedef struct connection_t { #ifdef ENABLE_GEOIP_STATS /** Unique ID for measuring tunneled network status requests. */ - uint64_t request_id; + uint64_t dirreq_id; #endif } connection_t; @@ -1962,7 +1962,7 @@ typedef struct circuit_t { struct circuit_t *next; /**< Next circuit in linked list of all circuits. */ #ifdef ENABLE_GEOIP_STATS /** Unique ID for measuring tunneled network status requests. */ - uint64_t request_id; + uint64_t dirreq_id; #endif } circuit_t; @@ -3683,9 +3683,9 @@ void geoip_free_all(void); /** Directory requests that we are measuring can be either direct or * tunneled. */ typedef enum { - REQUEST_DIRECT = 0, - REQUEST_TUNNELED = 1, -} directory_request_type_t; + DIRREQ_DIRECT = 0, + DIRREQ_TUNNELED = 1, +} dirreq_type_t; /** Possible states for either direct or tunneled directory requests that * are relevant for determining network status download times. */ @@ -3693,28 +3693,26 @@ typedef enum { /** Found that the client requests a network status; applies to both * direct and tunneled requests; initial state of a request that we are * measuring. */ - REQUEST_IS_FOR_NETWORK_STATUS = 0, + DIRREQ_IS_FOR_NETWORK_STATUS = 0, /** Finished writing a network status to the directory connection; * applies to both direct and tunneled requests; completes a direct * request. */ - FLUSHING_DIR_CONN_FINISHED = 1, + DIRREQ_FLUSHING_DIR_CONN_FINISHED = 1, /** END cell sent to circuit that initiated a tunneled request. */ - END_CELL_SENT = 2, + DIRREQ_END_CELL_SENT = 2, /** Flushed last cell from queue of the circuit that initiated a * tunneled request to the outbuf of the OR connection. */ - CIRC_QUEUE_FLUSHED = 3, + DIRREQ_CIRC_QUEUE_FLUSHED = 3, /** Flushed last byte from buffer of the OR connection belonging to the * circuit that initiated a tunneled request; completes a tunneled * request. */ - OR_CONN_BUFFER_FLUSHED = 4 -} directory_request_state_t; + DIRREQ_OR_CONN_BUFFER_FLUSHED = 4 +} dirreq_state_t; -void geoip_start_dirreq(uint64_t request_id, size_t response_size, - geoip_client_action_t action, - directory_request_type_t type); -void geoip_change_dirreq_state(uint64_t request_id, - directory_request_type_t type, - directory_request_state_t new_state); +void geoip_start_dirreq(uint64_t dirreq_id, size_t response_size, + geoip_client_action_t action, dirreq_type_t type); +void geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type, + dirreq_state_t new_state); /********************************* hibernate.c **********************/ diff --git a/src/or/relay.c b/src/or/relay.c index 580048be7b..5654736f96 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -535,9 +535,9 @@ relay_send_command_from_edge(uint16_t stream_id, circuit_t *circ, #ifdef ENABLE_GEOIP_STATS /* If we are sending an END cell and this circuit is used for a tunneled * directory request, advance its state. */ - if (relay_command == RELAY_COMMAND_END && circ->request_id) - geoip_change_dirreq_state(circ->request_id, REQUEST_TUNNELED, - END_CELL_SENT); + if (relay_command == RELAY_COMMAND_END && circ->dirreq_id) + geoip_change_dirreq_state(circ->dirreq_id, DIRREQ_TUNNELED, + DIRREQ_END_CELL_SENT); #endif if (cell_direction == CELL_DIRECTION_OUT && circ->n_conn) { @@ -1047,8 +1047,8 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ, * connection will be assigned the same ID when they are created * and linked. */ static uint64_t next_id = 0; - circ->request_id = ++next_id; - TO_CONN(TO_OR_CIRCUIT(circ)->p_conn)->request_id = circ->request_id; + circ->dirreq_id = ++next_id; + TO_CONN(TO_OR_CIRCUIT(circ)->p_conn)->dirreq_id = circ->dirreq_id; } #endif @@ -1844,9 +1844,10 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, #ifdef ENABLE_GEOIP_STATS /* If we just flushed our queue and this circuit is used for a * tunneled directory request, possibly advance its state. */ - if (queue->n == 0 && TO_CONN(conn)->request_id) - geoip_change_dirreq_state(TO_CONN(conn)->request_id, - REQUEST_TUNNELED, CIRC_QUEUE_FLUSHED); + if (queue->n == 0 && TO_CONN(conn)->dirreq_id) + geoip_change_dirreq_state(TO_CONN(conn)->dirreq_id, + DIRREQ_TUNNELED, + DIRREQ_CIRC_QUEUE_FLUSHED); #endif connection_write_to_buf(cell->body, CELL_NETWORK_SIZE, TO_CONN(conn)); From 8c496d1660c326c0bc2bd5c505255d5ec3b653ec Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Wed, 15 Jul 2009 16:32:40 +0200 Subject: [PATCH 5/6] Some tweaks to statistics. Changes to directory request statistics: - Rename GEOIP statistics to DIRREQ statistics, because they now include more than only GeoIP-based statistics, whereas other statistics are GeoIP-dependent, too. - Rename output file from geoip-stats to dirreq-stats. - Add new config option DirReqStatistics that is required to measure directory request statistics. - Clean up ChangeLog. Also ensure that entry guards statistics have access to a local GeoIP database. --- ChangeLog | 14 ++++++++------ configure.in | 8 ++++---- src/or/config.c | 34 ++++++++++++++++++++++++---------- src/or/connection.c | 2 +- src/or/connection_edge.c | 4 ++-- src/or/directory.c | 4 ++-- src/or/geoip.c | 40 ++++++++++++++++++++++++++-------------- src/or/or.h | 14 +++++++++----- src/or/relay.c | 6 +++--- src/or/router.c | 2 +- 10 files changed, 80 insertions(+), 48 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6b4f651828..e8b5000993 100644 --- a/ChangeLog +++ b/ChangeLog @@ -15,12 +15,14 @@ Changes in version 0.2.2.1-alpha - 2009-??-?? - The memarea code now uses a sentinel value at the end of each area to make sure nothing writes beyond the end of an area. This might help debug some conceivable causes of bug 930. - - Directories that are configured with the --enable-geoip-stats flag - now write their GeoIP stats to disk exactly every 24 hours. - Estimated shares of v2 and v3 requests are determined as averages, - not at the end of a measurement period. Also, unresolved requests - are listed with country code '??'. - Directories now also measure download times of network statuses. + - Directories that are configured with the --enable-dirreq-stats flag + and have "DirReqStatistics 1" set write directory request stats to + disk every 24 hours. As compared to the --enable-geoip-stats flag + in 0.2.1.x, there are a few improvements: 1) stats are written to + disk exactly every 24 hours; 2) estimated shares of v2 and v3 + requests are determined as mean values, not at the end of a + measurement period; 3) unresolved requests are listed with country + code '??'; 4) directories also measure download times. - Exit nodes can write statistics on the number of exit streams and transferred bytes per port to disk every 24 hours. To enable this, run configure with the --enable-exit-stats option, and set diff --git a/configure.in b/configure.in index 0f6ed80657..ab7d6480cf 100644 --- a/configure.in +++ b/configure.in @@ -92,11 +92,11 @@ if test "$enable_exit_stats" = "yes"; then AC_DEFINE(ENABLE_EXIT_STATS, 1, [Defined if we try to collect per-port statistics on exits]) fi -AC_ARG_ENABLE(geoip-stats, - AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics)) +AC_ARG_ENABLE(dirreq-stats, + AS_HELP_STRING(--enable-dirreq-stats, enable code for directories to collect per-country statistics)) -if test "$enable_geoip_stats" = "yes"; then - AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics]) +if test "$enable_dirreq_stats" = "yes"; then + AC_DEFINE(ENABLE_DIRREQ_STATS, 1, [Defined if we try to collect per-country statistics]) fi AC_ARG_ENABLE(buffer-stats, diff --git a/src/or/config.c b/src/or/config.c index 087a907e48..1811551556 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -187,12 +187,13 @@ static config_var_t _option_vars[] = { V(DirPort, UINT, "0"), V(DirPortFrontPage, FILENAME, NULL), OBSOLETE("DirPostPeriod"), -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS OBSOLETE("DirRecordUsageByCountry"), OBSOLETE("DirRecordUsageGranularity"), OBSOLETE("DirRecordUsageRetainIPs"), OBSOLETE("DirRecordUsageSaveInterval"), #endif + V(DirReqStatistics, BOOL, "0"), VAR("DirServer", LINELIST, DirServers, NULL), V(DNSPort, UINT, "0"), V(DNSListenAddress, LINELIST, NULL), @@ -1376,17 +1377,25 @@ options_act(or_options_t *old_options) geoip_load_file(actual_fname, options); tor_free(actual_fname); } -#ifdef ENABLE_GEOIP_STATS - /* Check if GeoIP database could be loaded. */ - if (!geoip_is_loaded()) { - log_warn(LD_CONFIG, "Configured to measure GeoIP statistics, but no " - "GeoIP database found!"); - return -1; + +#ifdef ENABLE_DIRREQ_STATS + if (options->DirReqStatistics) { + /* Check if GeoIP database could be loaded. */ + if (!geoip_is_loaded()) { + log_warn(LD_CONFIG, "Configured to measure directory request " + "statistics, but no GeoIP database found!"); + return -1; + } + log_notice(LD_CONFIG, "Configured to count directory requests by " + "country and write aggregate statistics to disk. Check the " + "dirreq-stats file in your data directory that will first " + "be written in 24 hours from now."); } - log_notice(LD_CONFIG, "Configured to measure usage by country and " - "write aggregate statistics to disk. Check the geoip-stats file " - "in your data directory once I've been running for 24 hours."); +#else + log_warn(LD_CONFIG, "DirReqStatistics enabled, but Tor was built " + "without support for directory request statistics."); #endif + #ifdef ENABLE_EXIT_STATS if (options->ExitPortStatistics) log_notice(LD_CONFIG, "Configured to measure exit port statistics. " @@ -1417,6 +1426,11 @@ options_act(or_options_t *old_options) log_warn(LD_CONFIG, "Bridges cannot be configured to measure " "additional GeoIP statistics as entry guards."); return -1; + } else if (!geoip_is_loaded()) { + /* Check if GeoIP database could be loaded. */ + log_warn(LD_CONFIG, "Configured to measure entry node statistics, " + "but no GeoIP database found!"); + return -1; } else log_notice(LD_CONFIG, "Configured to measure entry node " "statistics. Look for the entry-stats file that will " diff --git a/src/or/connection.c b/src/or/connection.c index 242a32c2d9..dc9c4eace2 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -2302,7 +2302,7 @@ connection_handle_write(connection_t *conn, int force) /* else open, or closing */ result = flush_buf_tls(or_conn->tls, conn->outbuf, max_to_write, &conn->outbuf_flushlen); -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /* If we just flushed the last bytes, check if this tunneled dir * request is done. */ if (buf_datalen(conn->outbuf) == 0 && conn->dirreq_id) diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c index f2b499fd6c..7a3d0a55be 100644 --- a/src/or/connection_edge.c +++ b/src/or/connection_edge.c @@ -2551,7 +2551,7 @@ connection_exit_begin_conn(cell_t *cell, circuit_t *circ) log_debug(LD_EXIT,"Creating new exit connection."); n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET); -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /* Remember the tunneled request ID in the new edge connection, so that * we can measure download times. */ TO_CONN(n_stream)->dirreq_id = circ->dirreq_id; @@ -2792,7 +2792,7 @@ connection_exit_connect_dir(edge_connection_t *exitconn) dirconn->_base.purpose = DIR_PURPOSE_SERVER; dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT; -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /* Note that the new dir conn belongs to the same tunneled request as * the edge conn, so that we can measure download times. */ TO_CONN(dirconn)->dirreq_id = TO_CONN(exitconn)->dirreq_id; diff --git a/src/or/directory.c b/src/or/directory.c index c6faeae2e1..976c08c19a 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -2562,7 +2562,7 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers, goto done; } -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS { struct in_addr in; if (tor_inet_aton((TO_CONN(conn))->address, &in)) { @@ -3210,7 +3210,7 @@ connection_dir_finished_flushing(dir_connection_t *conn) tor_assert(conn); tor_assert(conn->_base.type == CONN_TYPE_DIR); -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /* Note that we have finished writing the directory response. For direct * connections this means we're done, for tunneled connections its only * an intermediate step. */ diff --git a/src/or/geoip.c b/src/or/geoip.c index 0ecc466088..7aeec8f5f1 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -347,7 +347,7 @@ geoip_determine_shares(time_t now) last_time_determined_shares = now; } -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /** Calculate which fraction of v2 and v3 directory requests aimed at caches * have been sent to us since the last call of this function up to time * now. Set *v2_share_out and *v3_share_out to the @@ -390,10 +390,11 @@ geoip_note_client_seen(geoip_client_action_t action, if (client_history_starts > now) return; } else { -#ifndef ENABLE_GEOIP_STATS +#ifndef ENABLE_DIRREQ_STATS return; #else - if (options->BridgeRelay || options->BridgeAuthoritativeDir) + if (options->BridgeRelay || options->BridgeAuthoritativeDir || + !options->DirReqStatistics) return; #endif } @@ -494,7 +495,7 @@ geoip_remove_old_clients(time_t cutoff) client_history_starts = cutoff; } -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /** How many responses are we giving to clients requesting v2 network * statuses? */ static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM]; @@ -511,8 +512,10 @@ void geoip_note_ns_response(geoip_client_action_t action, geoip_ns_response_t response) { -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS static int arrays_initialized = 0; + if (!get_options()->DirReqStatistics) + return; if (!arrays_initialized) { memset(ns_v2_responses, 0, sizeof(ns_v2_responses)); memset(ns_v3_responses, 0, sizeof(ns_v3_responses)); @@ -649,7 +652,10 @@ void geoip_start_dirreq(uint64_t dirreq_id, size_t response_size, geoip_client_action_t action, dirreq_type_t type) { - dirreq_map_entry_t *ent = tor_malloc_zero(sizeof(dirreq_map_entry_t)); + dirreq_map_entry_t *ent; + if (!get_options()->DirReqStatistics) + return; + ent = tor_malloc_zero(sizeof(dirreq_map_entry_t)); ent->dirreq_id = dirreq_id; tor_gettimeofday(&ent->request_time); ent->response_size = response_size; @@ -668,7 +674,10 @@ void geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type, dirreq_state_t new_state) { - dirreq_map_entry_t *ent = _dirreq_map_get(type, dirreq_id); + dirreq_map_entry_t *ent; + if (!get_options()->DirReqStatistics) + return; + ent = _dirreq_map_get(type, dirreq_id); if (!ent) return; if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS) @@ -685,7 +694,7 @@ geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type, } } -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /** Return a newly allocated comma-separated string containing statistics * on network status downloads. The string contains the number of completed * requests, timeouts, and still running requests as well as the download @@ -788,7 +797,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action) { char *result = NULL; int min_observation_time = GEOIP_MIN_OBSERVATION_TIME; -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME; #endif if (!geoip_is_loaded()) @@ -803,7 +812,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action) unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries); unsigned total = 0; unsigned granularity = IP_GRANULARITY; -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS granularity = DIR_RECORD_USAGE_GRANULARITY; #endif HT_FOREACH(ent, clientmap, &client_history) { @@ -871,7 +880,7 @@ geoip_get_request_history(time_t now, geoip_client_action_t action) char *result; unsigned granularity = IP_GRANULARITY; int min_observation_time = GEOIP_MIN_OBSERVATION_TIME; -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS granularity = DIR_RECORD_USAGE_GRANULARITY; min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME; #endif @@ -916,14 +925,14 @@ geoip_get_request_history(time_t now, geoip_client_action_t action) return result; } -/** Store all our geoip statistics into $DATADIR/geoip-stats. */ +/** Store all our geoip statistics into $DATADIR/dirreq-stats. */ static void dump_geoip_stats(void) { -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS time_t now = time(NULL); time_t request_start; - char *filename = get_datadir_fname("geoip-stats"); + char *filename = get_datadir_fname("dirreq-stats"); char *data_v2 = NULL, *data_v3 = NULL; char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1]; open_file_t *open_file = NULL; @@ -931,6 +940,9 @@ dump_geoip_stats(void) FILE *out; int i; + if (!get_options()->DirReqStatistics) + goto done; + data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2); data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS); format_iso_time(since, geoip_get_history_start()); diff --git a/src/or/or.h b/src/or/or.h index 7b91ff7c33..445df4d78e 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -20,8 +20,8 @@ #ifndef INSTRUMENT_DOWNLOADS #define INSTRUMENT_DOWNLOADS 1 #endif -#ifndef ENABLE_GEOIP_STATS -#define ENABLE_GEOIP_STATS 1 +#ifndef ENABLE_DIRREQ_STATS +#define ENABLE_DIRREQ_STATS 1 #endif #ifndef ENABLE_BUFFER_STATS #define ENABLE_BUFFER_STATS 1 @@ -970,7 +970,7 @@ typedef struct connection_t { * to the evdns_server_port is uses to listen to and answer connections. */ struct evdns_server_port *dns_server_port; -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /** Unique ID for measuring tunneled network status requests. */ uint64_t dirreq_id; #endif @@ -1960,7 +1960,7 @@ typedef struct circuit_t { * linked to an OR connection. */ struct circuit_t *prev_active_on_n_conn; struct circuit_t *next; /**< Next circuit in linked list of all circuits. */ -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /** Unique ID for measuring tunneled network status requests. */ uint64_t dirreq_id; #endif @@ -2500,6 +2500,10 @@ typedef struct { * exit allows it, we use it. */ int AllowSingleHopCircuits; + /** If true, the user wants us to collect statistics on clients + * requesting network statuses from us as directory. */ + int DirReqStatistics; + /** If true, the user wants us to collect statistics on port usage. */ int ExitPortStatistics; @@ -2564,7 +2568,7 @@ typedef struct { int BridgeRecordUsageByCountry; #if 0 - /** If true, and Tor is built with GEOIP_STATS support, and we're a + /** If true, and Tor is built with DIRREQ_STATS support, and we're a * directory, record how many directory requests we get from each country. */ int DirRecordUsageByCountry; /** Round all GeoIP results to the next multiple of this value, to avoid diff --git a/src/or/relay.c b/src/or/relay.c index 5654736f96..098b95253e 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -532,7 +532,7 @@ relay_send_command_from_edge(uint16_t stream_id, circuit_t *circ, log_debug(LD_OR,"delivering %d cell %s.", relay_command, cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward"); -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /* If we are sending an END cell and this circuit is used for a tunneled * directory request, advance its state. */ if (relay_command == RELAY_COMMAND_END && circ->dirreq_id) @@ -1040,7 +1040,7 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ, "Begin cell for known stream. Dropping."); return 0; } -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS if (rh.command == RELAY_COMMAND_BEGIN_DIR) { /* Assign this circuit and its app-ward OR connection a unique ID, * so that we can measure download times. The local edge and dir @@ -1841,7 +1841,7 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max, orcirc->processed_cells++; } #endif -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS /* If we just flushed our queue and this circuit is used for a * tunneled directory request, possibly advance its state. */ if (queue->n == 0 && TO_CONN(conn)->dirreq_id) diff --git a/src/or/router.c b/src/or/router.c index bdea4fa764..42a0d56471 100644 --- a/src/or/router.c +++ b/src/or/router.c @@ -1916,7 +1916,7 @@ extrainfo_get_client_geoip_summary(time_t now) { static time_t last_purged_at = 0; int geoip_purge_interval = 48*60*60; -#ifdef ENABLE_GEOIP_STATS +#ifdef ENABLE_DIRREQ_STATS geoip_purge_interval = DIR_RECORD_USAGE_RETAIN_IPS; #endif #ifdef ENABLE_ENTRY_STATS From cbe432739183ad9cb307f15362489bcc86b405d0 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Wed, 15 Jul 2009 12:24:02 -0400 Subject: [PATCH 6/6] Switch dirreq_map to use HT_ functions rather than strmap. --- src/or/geoip.c | 98 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 34 deletions(-) diff --git a/src/or/geoip.c b/src/or/geoip.c index 7aeec8f5f1..3336401220 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -584,6 +584,7 @@ _c_hist_compare(const void **_a, const void **_b) * measure download times of requests to derive average client * bandwidths. */ typedef struct dirreq_map_entry_t { + HT_ENTRY(dirreq_map_entry_t) node; /** Unique identifier for this network status request; this is either the * conn->global_identifier of the dir conn (direct request) or a new * locally unique identifier of a circuit (tunneled request). This ID is @@ -600,11 +601,30 @@ typedef struct dirreq_map_entry_t { } dirreq_map_entry_t; /** Map of all directory requests asking for v2 or v3 network statuses in - * the current geoip-stats interval. Keys are strings starting with either - * "dir" for direct requests or "tun" for tunneled requests, followed by - * a unique uint64_t identifier represented as decimal string. Values are + * the current geoip-stats interval. Values are * of type *dirreq_map_entry_t. */ -static strmap_t *dirreq_map = NULL; +static HT_HEAD(dirreqmap, dirreq_map_entry_t) dirreq_map = + HT_INITIALIZER(); + +static int +dirreq_map_ent_eq(const dirreq_map_entry_t *a, + const dirreq_map_entry_t *b) +{ + return a->dirreq_id == b->dirreq_id && a->type == b->type; +} + +static unsigned +dirreq_map_ent_hash(const dirreq_map_entry_t *entry) +{ + unsigned u = (unsigned) entry->dirreq_id; + u += entry->type << 20; + return u; +} + +HT_PROTOTYPE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash, + dirreq_map_ent_eq); +HT_GENERATE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash, + dirreq_map_ent_eq, 0.6, malloc, realloc, free); /** Helper: Put entry into map of directory requests using * tunneled and dirreq_id as key parts. If there is @@ -613,20 +633,18 @@ static void _dirreq_map_put(dirreq_map_entry_t *entry, dirreq_type_t type, uint64_t dirreq_id) { - char key[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */ - dirreq_map_entry_t *ent; - if (!dirreq_map) - dirreq_map = strmap_new(); - tor_snprintf(key, sizeof(key), "%s"U64_FORMAT, - type == DIRREQ_TUNNELED ? "tun" : "dir", - U64_PRINTF_ARG(dirreq_id)); - ent = strmap_get(dirreq_map, key); - if (ent) { + dirreq_map_entry_t *old_ent; + tor_assert(entry->type == type); + tor_assert(entry->dirreq_id == dirreq_id); + + /* XXXX022 once we're sure the bug case never happens, we can switch + * to HT_INSERT */ + old_ent = HT_REPLACE(dirreqmap, &dirreq_map, entry); + if (old_ent && old_ent != entry) { log_warn(LD_BUG, "Error when putting directory request into local " - "map. There is already an entry for the same identifier."); + "map. There was already an entry for the same identifier."); return; } - strmap_set(dirreq_map, key, entry); } /** Helper: Look up and return an entry in the map of directory requests @@ -635,13 +653,10 @@ _dirreq_map_put(dirreq_map_entry_t *entry, dirreq_type_t type, static dirreq_map_entry_t * _dirreq_map_get(dirreq_type_t type, uint64_t dirreq_id) { - char key[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */ - if (!dirreq_map) - dirreq_map = strmap_new(); - tor_snprintf(key, sizeof(key), "%s"U64_FORMAT, - type == DIRREQ_TUNNELED ? "tun" : "dir", - U64_PRINTF_ARG(dirreq_id)); - return strmap_get(dirreq_map, key); + dirreq_map_entry_t lookup; + lookup.type = type; + lookup.dirreq_id = dirreq_id; + return HT_FIND(dirreqmap, &dirreq_map, &lookup); } /** Note that an either direct or tunneled (see type) directory @@ -708,16 +723,20 @@ geoip_get_dirreq_history(geoip_client_action_t action, smartlist_t *dirreq_times = NULL; uint32_t complete = 0, timeouts = 0, running = 0; int i = 0, bufsize = 1024, written; + dirreq_map_entry_t **ptr, **next, *ent; struct timeval now; + tor_gettimeofday(&now); - if (!dirreq_map) - return NULL; if (action != GEOIP_CLIENT_NETWORKSTATUS && action != GEOIP_CLIENT_NETWORKSTATUS_V2) return NULL; dirreq_times = smartlist_create(); - STRMAP_FOREACH_MODIFY(dirreq_map, key, dirreq_map_entry_t *, ent) { - if (ent->action == action && type == ent->type) { + for (ptr = HT_START(dirreqmap, &dirreq_map); ptr; ptr = next) { + ent = *ptr; + if (ent->action != action || ent->type != type) { + next = HT_NEXT(dirreqmap, &dirreq_map, ptr); + continue; + } else { if (ent->completed) { uint32_t *bytes_per_second = tor_malloc_zero(sizeof(uint32_t)); uint32_t time_diff = (uint32_t) tv_udiff(&ent->request_time, @@ -734,10 +753,10 @@ geoip_get_dirreq_history(geoip_client_action_t action, else running++; } + next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr); tor_free(ent); - MAP_DEL_CURRENT(key); } - } STRMAP_FOREACH_END; + } #define DIR_REQ_GRANULARITY 4 complete = round_uint32_to_next_multiple_of(complete, DIR_REQ_GRANULARITY); @@ -1106,13 +1125,24 @@ clear_geoip_db(void) void geoip_free_all(void) { - clientmap_entry_t **ent, **next, *this; - for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) { - this = *ent; - next = HT_NEXT_RMV(clientmap, &client_history, ent); - tor_free(this); + { + clientmap_entry_t **ent, **next, *this; + for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) { + this = *ent; + next = HT_NEXT_RMV(clientmap, &client_history, ent); + tor_free(this); + } + HT_CLEAR(clientmap, &client_history); + } + { + dirreq_map_entry_t **ent, **next, *this; + for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) { + this = *ent; + next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent); + tor_free(this); + } + HT_CLEAR(dirreqmap, &dirreq_map); } - HT_CLEAR(clientmap, &client_history); clear_geoip_db(); }