Fix bridge statistics.

Fix statistics on client numbers by country as seen by bridges that were
broken in 0.2.2.1-alpha. Also switch to reporting full 24-hour intervals
instead of variable 12-to-48-hour intervals.
This commit is contained in:
Karsten Loesing 2009-12-12 08:32:46 +01:00
parent 62c2a5a883
commit e1e5c1b3ab
8 changed files with 251 additions and 65 deletions

View File

@ -25,6 +25,9 @@ Changes in version 0.2.2.7-alpha - 2009-??-??
- Do not segfault when writing buffer stats when we haven't observed
a single circuit to report about. Found by Fabian Lanze. Bugfix on
0.2.2.1-alpha.
- Fix statistics on client numbers by country as seen by bridges that
were broken in 0.2.2.1-alpha. Also switch to reporting full 24-hour
intervals instead of variable 12-to-48-hour intervals.
Changes in version 0.2.2.6-alpha - 2009-11-19

View File

@ -1637,11 +1637,11 @@
TimeStarted is a quoted string indicating when the reported summary
counts from (in GMT).
The CountrySummary keyword has as its argument a comma-separated
set of "countrycode=count" pairs. For example,
650-CLIENTS_SEEN TimeStarted="Thu Dec 25 23:50:43 EST 2008"
650 CountrySummary=us=16,de=8,uk=8
[XXX Matt Edman informs me that the time format above is wrong. -RD]
The CountrySummary keyword has as its argument a comma-separated,
possibly empty set of "countrycode=count" pairs. For example (without
linebreak),
650-CLIENTS_SEEN TimeStarted="2008-12-25 23:50:43"
CountrySummary=us=16,de=8,uk=8
4.1.15. New consensus networkstatus has arrived.

View File

@ -627,8 +627,8 @@
As documented in 2.1 above. See migration notes in section 2.2.1.
"geoip-start" YYYY-MM-DD HH:MM:SS NL
"geoip-client-origins" CC=N,CC=N,... NL
("geoip-start" YYYY-MM-DD HH:MM:SS NL)
("geoip-client-origins" CC=N,CC=N,... NL)
Only generated by bridge routers (see blocking.pdf), and only
when they have been configured with a geoip database.
@ -641,6 +641,33 @@
"geoip-start" is the time at which we began collecting geoip
statistics.
"geoip-start" and "geoip-client-origins" have been replaced by
"bridge-stats-end" and "bridge-stats-ips" in 0.2.2.4-alpha. The
reason is that the measurement interval with "geoip-stats" as
determined by subtracting "geoip-start" from "published" could
have had a variable length, whereas the measurement interval in
0.2.2.4-alpha and later is set to be exactly 24 hours long. In
order to clearly distinguish the new measurement intervals from
the old ones, the new keywords have been introduced.
"bridge-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
[At most once.]
YYYY-MM-DD HH:MM:SS defines the end of the included measurement
interval of length NSEC seconds (86400 seconds by default).
A "bridge-stats-end" line, as well as any other "bridge-*" line,
is only added when the relay has been running as a bridge for at
least 24 hours.
"bridge-ips" CC=N,CC=N,... NL
[At most once.]
List of mappings from two-letter country codes to the number of
unique IP addresses that have connected from that country to the
bridge and which are no known relays, rounded up to the nearest
multiple of 8.
"dirreq-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
[At most once.]

View File

@ -1755,21 +1755,10 @@ getinfo_helper_events(control_connection_t *control_conn,
"information", question);
}
} else if (!strcmp(question, "status/clients-seen")) {
char geoip_start[ISO_TIME_LEN+1];
size_t answer_len;
char *geoip_summary = extrainfo_get_client_geoip_summary(time(NULL));
if (!geoip_summary)
char *bridge_stats = geoip_get_bridge_stats_controller(time(NULL));
if (!bridge_stats)
return -1;
answer_len = strlen("TimeStarted=\"\" CountrySummary=") +
ISO_TIME_LEN + strlen(geoip_summary) + 1;
*answer = tor_malloc(answer_len);
format_iso_time(geoip_start, geoip_get_history_start());
tor_snprintf(*answer, answer_len,
"TimeStarted=\"%s\" CountrySummary=%s",
geoip_start, geoip_summary);
tor_free(geoip_summary);
*answer = bridge_stats;
} else {
return 0;
}
@ -3846,10 +3835,9 @@ control_event_bootstrap_problem(const char *warn, int reason)
* from recently. Send a copy to the controller in case it wants to
* display it for the user. */
void
control_event_clients_seen(const char *timestarted, const char *countries)
control_event_clients_seen(const char *controller_str)
{
send_control_event(EVENT_CLIENTS_SEEN, 0,
"650 CLIENTS_SEEN TimeStarted=\"%s\" CountrySummary=%s\r\n",
timestarted, countries);
"650 CLIENTS_SEEN %s\r\n", controller_str);
}

View File

@ -1081,6 +1081,173 @@ geoip_dirreq_stats_write(time_t now)
tor_free(data_v3);
}
/** Start time of bridge stats. */
static time_t start_of_bridge_stats_interval;
/** Initialize bridge stats. */
void
geoip_bridge_stats_init(time_t now)
{
start_of_bridge_stats_interval = now;
}
/** Parse the bridge statistics as they are written to extra-info
* descriptors for being returned to controller clients. Return the
* controller string if successful, or NULL otherwise. */
static char *
parse_bridge_stats_controller(const char *stats_str, time_t now)
{
char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1],
*controller_str, *eos;
const char *stats_end_line = "bridge-stats-end",
*ips_line = "bridge-ips", *tmp;
time_t stats_end_time;
size_t controller_len;
int seconds;
tor_assert(stats_str);
/* Parse timestamp and number of seconds from
"bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */
tmp = strstr(stats_str, stats_end_line);
if (!tmp || strlen(tmp) < strlen(stats_end_line) + 1 + ISO_TIME_LEN + 6)
return NULL;
strlcpy(stats_end_str, tmp + strlen(stats_end_line) + 1,
sizeof(stats_end_str));
if (parse_iso_time(stats_end_str, &stats_end_time) < 0)
return NULL;
if (stats_end_time < now - (25*60*60) ||
stats_end_time > now + (1*60*60))
return NULL;
seconds = (int)strtol(tmp + strlen(stats_end_line) + 1 +
ISO_TIME_LEN + 2, &eos, 10);
if (!eos || seconds < 23*60*60)
return NULL;
format_iso_time(stats_start_str, stats_end_time - seconds);
/* Parse: "bridge-ips CC=N,CC=N,..." */
tmp = strstr(tmp, ips_line);
if (!tmp || strlen(tmp) < strlen(ips_line))
return NULL;
if (strlen(tmp) > strlen(ips_line) + 2)
tmp += strlen(ips_line) + 1;
else
tmp = "";
controller_len = strlen("TimeStarted=\"\" CountrySummary=") +
ISO_TIME_LEN + strlen(tmp) + 1;
controller_str = tor_malloc(controller_len);
if (tor_snprintf(controller_str, controller_len,
"TimeStarted=\"%s\" CountrySummary=%s",
stats_start_str, tmp) < 0) {
tor_free(controller_str);
return NULL;
}
return controller_str;
}
/** Most recent bridge statistics formatted to be written to extra-info
* descriptors. */
static char *bridge_stats_extrainfo = NULL;
/** Most recent bridge statistics formatted to be returned to controller
* clients. */
static char *bridge_stats_controller = NULL;
/** Write bridge statistics to $DATADIR/stats/bridge-stats and return
* when we should next try to write statistics. */
time_t
geoip_bridge_stats_write(time_t now)
{
char *statsdir = NULL, *filename = NULL, *data = NULL,
written[ISO_TIME_LEN+1], *out = NULL, *controller_str;
size_t len;
/* If we changed from relay to bridge recently, adapt starting time
* of current measurements. */
if (start_of_bridge_stats_interval < client_history_starts)
start_of_bridge_stats_interval = client_history_starts;
/* Check if 24 hours have passed since starting measurements. */
if (now < start_of_bridge_stats_interval +
DIR_ENTRY_RECORD_USAGE_RETAIN_IPS)
return start_of_bridge_stats_interval +
DIR_ENTRY_RECORD_USAGE_RETAIN_IPS;
/* Discard all items in the client history that are too old. */
geoip_remove_old_clients(start_of_bridge_stats_interval);
statsdir = get_datadir_fname("stats");
if (check_private_dir(statsdir, CPD_CREATE) < 0)
goto done;
filename = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats");
data = geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT);
format_iso_time(written, now);
len = strlen("bridge-stats-end (999999 s)\nbridge-ips \n") +
ISO_TIME_LEN + (data ? strlen(data) : 0) + 1;
out = tor_malloc(len);
if (tor_snprintf(out, len, "bridge-stats-end %s (%u s)\nbridge-ips %s\n",
written, (unsigned) (now - start_of_bridge_stats_interval),
data ? data : "") < 0)
goto done;
write_str_to_file(filename, out, 0);
controller_str = parse_bridge_stats_controller(out, now);
if (!controller_str)
goto done;
start_of_bridge_stats_interval = now;
tor_free(bridge_stats_extrainfo);
tor_free(bridge_stats_controller);
bridge_stats_extrainfo = out;
out = NULL;
bridge_stats_controller = controller_str;
control_event_clients_seen(controller_str);
done:
tor_free(filename);
tor_free(statsdir);
tor_free(data);
tor_free(out);
return start_of_bridge_stats_interval +
DIR_ENTRY_RECORD_USAGE_RETAIN_IPS;
}
/** Try to load the most recent bridge statistics from disk, unless we
* have finished a measurement interval lately. */
static void
load_bridge_stats(time_t now)
{
char *fname, *contents, *controller_str;
if (bridge_stats_extrainfo)
return;
fname = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats");
contents = read_file_to_str(fname, 0, NULL);
if (contents) {
controller_str = parse_bridge_stats_controller(contents, now);
if (controller_str) {
bridge_stats_extrainfo = contents;
bridge_stats_controller = controller_str;
} else {
tor_free(contents);
}
}
tor_free(fname);
}
/** Return most recent bridge statistics for inclusion in extra-info
* descriptors, or NULL if we don't have recent bridge statistics. */
char *
geoip_get_bridge_stats_extrainfo(time_t now)
{
load_bridge_stats(now);
return bridge_stats_extrainfo;
}
/** Return most recent bridge statistics to be returned to controller
* clients, or NULL if we don't have recent bridge statistics. */
char *
geoip_get_bridge_stats_controller(time_t now)
{
load_bridge_stats(now);
return bridge_stats_controller;
}
/** Start time of entry stats. */
static time_t start_of_entry_stats_interval;

View File

@ -832,6 +832,8 @@ run_scheduled_events(time_t now)
static time_t time_to_recheck_bandwidth = 0;
static time_t time_to_check_for_expired_networkstatus = 0;
static time_t time_to_write_stats_files = 0;
static time_t time_to_write_bridge_stats = 0;
static int should_init_bridge_stats = 1;
static time_t time_to_retry_dns_init = 0;
or_options_t *options = get_options();
int i;
@ -966,7 +968,8 @@ run_scheduled_events(time_t now)
if (options->CellStatistics || options->DirReqStatistics ||
options->EntryStatistics || options->ExitPortStatistics) {
if (!time_to_write_stats_files) {
/* Initialize stats. */
/* Initialize stats. We're doing this here and not in options_act,
* so that we know exactly when the 24 hours interval ends. */
if (options->CellStatistics)
rep_hist_buffer_stats_init(now);
if (options->DirReqStatistics)
@ -998,6 +1001,28 @@ run_scheduled_events(time_t now)
}
}
/* 1h. Check whether we should write bridge statistics to disk.
*/
if (should_record_bridge_info(options)) {
if (time_to_write_bridge_stats < now) {
if (should_init_bridge_stats) {
/* (Re-)initialize bridge statistics. */
geoip_bridge_stats_init(now);
time_to_write_bridge_stats = now + WRITE_STATS_INTERVAL;
should_init_bridge_stats = 0;
} else {
/* Possibly write bridge statistics to disk and ask when to write
* them next time. */
time_to_write_bridge_stats = geoip_bridge_stats_write(
time_to_write_bridge_stats);
}
}
} else if (!should_init_bridge_stats) {
/* Bridge mode was turned off. Ensure that stats are re-initialized
* next time bridge mode is turned on. */
should_init_bridge_stats = 1;
}
/* Remove old information from rephist and the rend cache. */
if (time_to_clean_caches < now) {
rep_history_clean(now - options->RephistTrackTime);

View File

@ -3630,8 +3630,7 @@ typedef enum {
void control_event_bootstrap(bootstrap_status_t status, int progress);
void control_event_bootstrap_problem(const char *warn, int reason);
void control_event_clients_seen(const char *timestarted,
const char *countries);
void control_event_clients_seen(const char *controller_str);
#ifdef CONTROL_PRIVATE
/* Used only by control.c and test.c */
@ -4089,6 +4088,10 @@ void geoip_dirreq_stats_init(time_t now);
void geoip_dirreq_stats_write(time_t now);
void geoip_entry_stats_init(time_t now);
void geoip_entry_stats_write(time_t now);
void geoip_bridge_stats_init(time_t now);
time_t geoip_bridge_stats_write(time_t now);
char *geoip_get_bridge_stats_extrainfo(time_t);
char *geoip_get_bridge_stats_controller(time_t);
/********************************* hibernate.c **********************/
@ -4749,7 +4752,6 @@ int router_dump_router_to_string(char *s, size_t maxlen, routerinfo_t *router,
crypto_pk_env_t *ident_key);
int extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
crypto_pk_env_t *ident_key);
char *extrainfo_get_client_geoip_summary(time_t);
int is_legal_nickname(const char *s);
int is_legal_nickname_or_hexdigest(const char *s);
int is_legal_hexdigest(const char *s);

View File

@ -1882,6 +1882,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
int result;
size_t len;
static int write_stats_to_extrainfo = 1;
time_t now = time(NULL);
base16_encode(identity, sizeof(identity),
extrainfo->cache_info.identity_digest, DIGEST_LEN);
@ -1896,7 +1897,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
if (options->ExtraInfoStatistics && write_stats_to_extrainfo) {
char *contents = NULL;
time_t now = time(NULL);
log_info(LD_GENERAL, "Adding stats to extra-info descriptor.");
if (options->DirReqStatistics &&
load_stats_file("stats"PATH_SEPARATOR"dirreq-stats",
@ -1953,18 +1953,16 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
return -1;
if (should_record_bridge_info(options)) {
char *geoip_summary = extrainfo_get_client_geoip_summary(time(NULL));
if (geoip_summary) {
char geoip_start[ISO_TIME_LEN+1];
format_iso_time(geoip_start, geoip_get_history_start());
result = tor_snprintf(s+strlen(s), maxlen-strlen(s),
"geoip-start-time %s\n"
"geoip-client-origins %s\n",
geoip_start, geoip_summary);
control_event_clients_seen(geoip_start, geoip_summary);
tor_free(geoip_summary);
if (result<0)
return -1;
char *bridge_stats = geoip_get_bridge_stats_extrainfo(now);
if (bridge_stats) {
size_t pos = strlen(s);
if (strlcpy(s + pos, bridge_stats, maxlen - strlen(s)) !=
strlen(bridge_stats)) {
log_warn(LD_DIR, "Could not write bridge-stats to extra-info "
"descriptor.");
s[pos] = '\0';
}
tor_free(bridge_stats);
}
}
@ -2013,30 +2011,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
return (int)strlen(s)+1;
}
/** Wrapper function for geoip_get_client_history(). It first discards
* any items in the client history that are too old -- it dumps anything
* more than 48 hours old, but it only considers whether to dump at most
* once per 48 hours, so we aren't too precise to an observer (see also
* r14780).
*/
char *
extrainfo_get_client_geoip_summary(time_t now)
{
static time_t last_purged_at = 0;
int geoip_purge_interval =
(get_options()->DirReqStatistics || get_options()->EntryStatistics) ?
DIR_ENTRY_RECORD_USAGE_RETAIN_IPS : 48*60*60;
if (now > last_purged_at+geoip_purge_interval) {
/* (Note that this also discards items in the client history with
* action GEOIP_CLIENT_NETWORKSTATUS{_V2}, which doesn't matter
* because bridge and directory stats are independent. Keep in mind
* for future extensions, though.) */
geoip_remove_old_clients(now-geoip_purge_interval);
last_purged_at = now;
}
return geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT);
}
/** Return true iff <b>s</b> is a legally valid server nickname. */
int
is_legal_nickname(const char *s)