From e1e5c1b3aba96afbd6fa3b5ece262720dd45a80e Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Sat, 12 Dec 2009 08:32:46 +0100 Subject: [PATCH] Fix bridge statistics. Fix statistics on client numbers by country as seen by bridges that were broken in 0.2.2.1-alpha. Also switch to reporting full 24-hour intervals instead of variable 12-to-48-hour intervals. --- ChangeLog | 3 + doc/spec/control-spec.txt | 10 +-- doc/spec/dir-spec.txt | 31 ++++++- src/or/control.c | 22 ++--- src/or/geoip.c | 167 ++++++++++++++++++++++++++++++++++++++ src/or/main.c | 27 +++++- src/or/or.h | 8 +- src/or/router.c | 48 +++-------- 8 files changed, 251 insertions(+), 65 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2cb2383935..56c8958c9a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -25,6 +25,9 @@ Changes in version 0.2.2.7-alpha - 2009-??-?? - Do not segfault when writing buffer stats when we haven't observed a single circuit to report about. Found by Fabian Lanze. Bugfix on 0.2.2.1-alpha. + - Fix statistics on client numbers by country as seen by bridges that + were broken in 0.2.2.1-alpha. Also switch to reporting full 24-hour + intervals instead of variable 12-to-48-hour intervals. Changes in version 0.2.2.6-alpha - 2009-11-19 diff --git a/doc/spec/control-spec.txt b/doc/spec/control-spec.txt index 33d4658476..73f0b020a9 100644 --- a/doc/spec/control-spec.txt +++ b/doc/spec/control-spec.txt @@ -1637,11 +1637,11 @@ TimeStarted is a quoted string indicating when the reported summary counts from (in GMT). - The CountrySummary keyword has as its argument a comma-separated - set of "countrycode=count" pairs. For example, - 650-CLIENTS_SEEN TimeStarted="Thu Dec 25 23:50:43 EST 2008" - 650 CountrySummary=us=16,de=8,uk=8 -[XXX Matt Edman informs me that the time format above is wrong. -RD] + The CountrySummary keyword has as its argument a comma-separated, + possibly empty set of "countrycode=count" pairs. For example (without + linebreak), + 650-CLIENTS_SEEN TimeStarted="2008-12-25 23:50:43" + CountrySummary=us=16,de=8,uk=8 4.1.15. New consensus networkstatus has arrived. diff --git a/doc/spec/dir-spec.txt b/doc/spec/dir-spec.txt index 5eedbd5348..0bfc758ee1 100644 --- a/doc/spec/dir-spec.txt +++ b/doc/spec/dir-spec.txt @@ -627,8 +627,8 @@ As documented in 2.1 above. See migration notes in section 2.2.1. - "geoip-start" YYYY-MM-DD HH:MM:SS NL - "geoip-client-origins" CC=N,CC=N,... NL + ("geoip-start" YYYY-MM-DD HH:MM:SS NL) + ("geoip-client-origins" CC=N,CC=N,... NL) Only generated by bridge routers (see blocking.pdf), and only when they have been configured with a geoip database. @@ -641,6 +641,33 @@ "geoip-start" is the time at which we began collecting geoip statistics. + "geoip-start" and "geoip-client-origins" have been replaced by + "bridge-stats-end" and "bridge-stats-ips" in 0.2.2.4-alpha. The + reason is that the measurement interval with "geoip-stats" as + determined by subtracting "geoip-start" from "published" could + have had a variable length, whereas the measurement interval in + 0.2.2.4-alpha and later is set to be exactly 24 hours long. In + order to clearly distinguish the new measurement intervals from + the old ones, the new keywords have been introduced. + + "bridge-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL + [At most once.] + + YYYY-MM-DD HH:MM:SS defines the end of the included measurement + interval of length NSEC seconds (86400 seconds by default). + + A "bridge-stats-end" line, as well as any other "bridge-*" line, + is only added when the relay has been running as a bridge for at + least 24 hours. + + "bridge-ips" CC=N,CC=N,... NL + [At most once.] + + List of mappings from two-letter country codes to the number of + unique IP addresses that have connected from that country to the + bridge and which are no known relays, rounded up to the nearest + multiple of 8. + "dirreq-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL [At most once.] diff --git a/src/or/control.c b/src/or/control.c index c3567bdaf2..d3ed8587bd 100644 --- a/src/or/control.c +++ b/src/or/control.c @@ -1755,21 +1755,10 @@ getinfo_helper_events(control_connection_t *control_conn, "information", question); } } else if (!strcmp(question, "status/clients-seen")) { - char geoip_start[ISO_TIME_LEN+1]; - size_t answer_len; - char *geoip_summary = extrainfo_get_client_geoip_summary(time(NULL)); - - if (!geoip_summary) + char *bridge_stats = geoip_get_bridge_stats_controller(time(NULL)); + if (!bridge_stats) return -1; - - answer_len = strlen("TimeStarted=\"\" CountrySummary=") + - ISO_TIME_LEN + strlen(geoip_summary) + 1; - *answer = tor_malloc(answer_len); - format_iso_time(geoip_start, geoip_get_history_start()); - tor_snprintf(*answer, answer_len, - "TimeStarted=\"%s\" CountrySummary=%s", - geoip_start, geoip_summary); - tor_free(geoip_summary); + *answer = bridge_stats; } else { return 0; } @@ -3846,10 +3835,9 @@ control_event_bootstrap_problem(const char *warn, int reason) * from recently. Send a copy to the controller in case it wants to * display it for the user. */ void -control_event_clients_seen(const char *timestarted, const char *countries) +control_event_clients_seen(const char *controller_str) { send_control_event(EVENT_CLIENTS_SEEN, 0, - "650 CLIENTS_SEEN TimeStarted=\"%s\" CountrySummary=%s\r\n", - timestarted, countries); + "650 CLIENTS_SEEN %s\r\n", controller_str); } diff --git a/src/or/geoip.c b/src/or/geoip.c index a57e1fb725..a43daaf067 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -1081,6 +1081,173 @@ geoip_dirreq_stats_write(time_t now) tor_free(data_v3); } +/** Start time of bridge stats. */ +static time_t start_of_bridge_stats_interval; + +/** Initialize bridge stats. */ +void +geoip_bridge_stats_init(time_t now) +{ + start_of_bridge_stats_interval = now; +} + +/** Parse the bridge statistics as they are written to extra-info + * descriptors for being returned to controller clients. Return the + * controller string if successful, or NULL otherwise. */ +static char * +parse_bridge_stats_controller(const char *stats_str, time_t now) +{ + char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1], + *controller_str, *eos; + const char *stats_end_line = "bridge-stats-end", + *ips_line = "bridge-ips", *tmp; + time_t stats_end_time; + size_t controller_len; + int seconds; + tor_assert(stats_str); + + /* Parse timestamp and number of seconds from + "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */ + tmp = strstr(stats_str, stats_end_line); + if (!tmp || strlen(tmp) < strlen(stats_end_line) + 1 + ISO_TIME_LEN + 6) + return NULL; + strlcpy(stats_end_str, tmp + strlen(stats_end_line) + 1, + sizeof(stats_end_str)); + if (parse_iso_time(stats_end_str, &stats_end_time) < 0) + return NULL; + if (stats_end_time < now - (25*60*60) || + stats_end_time > now + (1*60*60)) + return NULL; + seconds = (int)strtol(tmp + strlen(stats_end_line) + 1 + + ISO_TIME_LEN + 2, &eos, 10); + if (!eos || seconds < 23*60*60) + return NULL; + format_iso_time(stats_start_str, stats_end_time - seconds); + + /* Parse: "bridge-ips CC=N,CC=N,..." */ + tmp = strstr(tmp, ips_line); + if (!tmp || strlen(tmp) < strlen(ips_line)) + return NULL; + if (strlen(tmp) > strlen(ips_line) + 2) + tmp += strlen(ips_line) + 1; + else + tmp = ""; + controller_len = strlen("TimeStarted=\"\" CountrySummary=") + + ISO_TIME_LEN + strlen(tmp) + 1; + controller_str = tor_malloc(controller_len); + if (tor_snprintf(controller_str, controller_len, + "TimeStarted=\"%s\" CountrySummary=%s", + stats_start_str, tmp) < 0) { + tor_free(controller_str); + return NULL; + } + return controller_str; +} + +/** Most recent bridge statistics formatted to be written to extra-info + * descriptors. */ +static char *bridge_stats_extrainfo = NULL; + +/** Most recent bridge statistics formatted to be returned to controller + * clients. */ +static char *bridge_stats_controller = NULL; + +/** Write bridge statistics to $DATADIR/stats/bridge-stats and return + * when we should next try to write statistics. */ +time_t +geoip_bridge_stats_write(time_t now) +{ + char *statsdir = NULL, *filename = NULL, *data = NULL, + written[ISO_TIME_LEN+1], *out = NULL, *controller_str; + size_t len; + + /* If we changed from relay to bridge recently, adapt starting time + * of current measurements. */ + if (start_of_bridge_stats_interval < client_history_starts) + start_of_bridge_stats_interval = client_history_starts; + + /* Check if 24 hours have passed since starting measurements. */ + if (now < start_of_bridge_stats_interval + + DIR_ENTRY_RECORD_USAGE_RETAIN_IPS) + return start_of_bridge_stats_interval + + DIR_ENTRY_RECORD_USAGE_RETAIN_IPS; + + /* Discard all items in the client history that are too old. */ + geoip_remove_old_clients(start_of_bridge_stats_interval); + + statsdir = get_datadir_fname("stats"); + if (check_private_dir(statsdir, CPD_CREATE) < 0) + goto done; + filename = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats"); + data = geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT); + format_iso_time(written, now); + len = strlen("bridge-stats-end (999999 s)\nbridge-ips \n") + + ISO_TIME_LEN + (data ? strlen(data) : 0) + 1; + out = tor_malloc(len); + if (tor_snprintf(out, len, "bridge-stats-end %s (%u s)\nbridge-ips %s\n", + written, (unsigned) (now - start_of_bridge_stats_interval), + data ? data : "") < 0) + goto done; + write_str_to_file(filename, out, 0); + controller_str = parse_bridge_stats_controller(out, now); + if (!controller_str) + goto done; + start_of_bridge_stats_interval = now; + tor_free(bridge_stats_extrainfo); + tor_free(bridge_stats_controller); + bridge_stats_extrainfo = out; + out = NULL; + bridge_stats_controller = controller_str; + control_event_clients_seen(controller_str); + done: + tor_free(filename); + tor_free(statsdir); + tor_free(data); + tor_free(out); + return start_of_bridge_stats_interval + + DIR_ENTRY_RECORD_USAGE_RETAIN_IPS; +} + +/** Try to load the most recent bridge statistics from disk, unless we + * have finished a measurement interval lately. */ +static void +load_bridge_stats(time_t now) +{ + char *fname, *contents, *controller_str; + if (bridge_stats_extrainfo) + return; + fname = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats"); + contents = read_file_to_str(fname, 0, NULL); + if (contents) { + controller_str = parse_bridge_stats_controller(contents, now); + if (controller_str) { + bridge_stats_extrainfo = contents; + bridge_stats_controller = controller_str; + } else { + tor_free(contents); + } + } + tor_free(fname); +} + +/** Return most recent bridge statistics for inclusion in extra-info + * descriptors, or NULL if we don't have recent bridge statistics. */ +char * +geoip_get_bridge_stats_extrainfo(time_t now) +{ + load_bridge_stats(now); + return bridge_stats_extrainfo; +} + +/** Return most recent bridge statistics to be returned to controller + * clients, or NULL if we don't have recent bridge statistics. */ +char * +geoip_get_bridge_stats_controller(time_t now) +{ + load_bridge_stats(now); + return bridge_stats_controller; +} + /** Start time of entry stats. */ static time_t start_of_entry_stats_interval; diff --git a/src/or/main.c b/src/or/main.c index 4dc182efe4..101c08429d 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -832,6 +832,8 @@ run_scheduled_events(time_t now) static time_t time_to_recheck_bandwidth = 0; static time_t time_to_check_for_expired_networkstatus = 0; static time_t time_to_write_stats_files = 0; + static time_t time_to_write_bridge_stats = 0; + static int should_init_bridge_stats = 1; static time_t time_to_retry_dns_init = 0; or_options_t *options = get_options(); int i; @@ -966,7 +968,8 @@ run_scheduled_events(time_t now) if (options->CellStatistics || options->DirReqStatistics || options->EntryStatistics || options->ExitPortStatistics) { if (!time_to_write_stats_files) { - /* Initialize stats. */ + /* Initialize stats. We're doing this here and not in options_act, + * so that we know exactly when the 24 hours interval ends. */ if (options->CellStatistics) rep_hist_buffer_stats_init(now); if (options->DirReqStatistics) @@ -998,6 +1001,28 @@ run_scheduled_events(time_t now) } } + /* 1h. Check whether we should write bridge statistics to disk. + */ + if (should_record_bridge_info(options)) { + if (time_to_write_bridge_stats < now) { + if (should_init_bridge_stats) { + /* (Re-)initialize bridge statistics. */ + geoip_bridge_stats_init(now); + time_to_write_bridge_stats = now + WRITE_STATS_INTERVAL; + should_init_bridge_stats = 0; + } else { + /* Possibly write bridge statistics to disk and ask when to write + * them next time. */ + time_to_write_bridge_stats = geoip_bridge_stats_write( + time_to_write_bridge_stats); + } + } + } else if (!should_init_bridge_stats) { + /* Bridge mode was turned off. Ensure that stats are re-initialized + * next time bridge mode is turned on. */ + should_init_bridge_stats = 1; + } + /* Remove old information from rephist and the rend cache. */ if (time_to_clean_caches < now) { rep_history_clean(now - options->RephistTrackTime); diff --git a/src/or/or.h b/src/or/or.h index 736e66a52d..85f549fc49 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3630,8 +3630,7 @@ typedef enum { void control_event_bootstrap(bootstrap_status_t status, int progress); void control_event_bootstrap_problem(const char *warn, int reason); -void control_event_clients_seen(const char *timestarted, - const char *countries); +void control_event_clients_seen(const char *controller_str); #ifdef CONTROL_PRIVATE /* Used only by control.c and test.c */ @@ -4089,6 +4088,10 @@ void geoip_dirreq_stats_init(time_t now); void geoip_dirreq_stats_write(time_t now); void geoip_entry_stats_init(time_t now); void geoip_entry_stats_write(time_t now); +void geoip_bridge_stats_init(time_t now); +time_t geoip_bridge_stats_write(time_t now); +char *geoip_get_bridge_stats_extrainfo(time_t); +char *geoip_get_bridge_stats_controller(time_t); /********************************* hibernate.c **********************/ @@ -4749,7 +4752,6 @@ int router_dump_router_to_string(char *s, size_t maxlen, routerinfo_t *router, crypto_pk_env_t *ident_key); int extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, crypto_pk_env_t *ident_key); -char *extrainfo_get_client_geoip_summary(time_t); int is_legal_nickname(const char *s); int is_legal_nickname_or_hexdigest(const char *s); int is_legal_hexdigest(const char *s); diff --git a/src/or/router.c b/src/or/router.c index 8b6d1d9bbd..7362e60f66 100644 --- a/src/or/router.c +++ b/src/or/router.c @@ -1882,6 +1882,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, int result; size_t len; static int write_stats_to_extrainfo = 1; + time_t now = time(NULL); base16_encode(identity, sizeof(identity), extrainfo->cache_info.identity_digest, DIGEST_LEN); @@ -1896,7 +1897,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, if (options->ExtraInfoStatistics && write_stats_to_extrainfo) { char *contents = NULL; - time_t now = time(NULL); log_info(LD_GENERAL, "Adding stats to extra-info descriptor."); if (options->DirReqStatistics && load_stats_file("stats"PATH_SEPARATOR"dirreq-stats", @@ -1953,18 +1953,16 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, return -1; if (should_record_bridge_info(options)) { - char *geoip_summary = extrainfo_get_client_geoip_summary(time(NULL)); - if (geoip_summary) { - char geoip_start[ISO_TIME_LEN+1]; - format_iso_time(geoip_start, geoip_get_history_start()); - result = tor_snprintf(s+strlen(s), maxlen-strlen(s), - "geoip-start-time %s\n" - "geoip-client-origins %s\n", - geoip_start, geoip_summary); - control_event_clients_seen(geoip_start, geoip_summary); - tor_free(geoip_summary); - if (result<0) - return -1; + char *bridge_stats = geoip_get_bridge_stats_extrainfo(now); + if (bridge_stats) { + size_t pos = strlen(s); + if (strlcpy(s + pos, bridge_stats, maxlen - strlen(s)) != + strlen(bridge_stats)) { + log_warn(LD_DIR, "Could not write bridge-stats to extra-info " + "descriptor."); + s[pos] = '\0'; + } + tor_free(bridge_stats); } } @@ -2013,30 +2011,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, return (int)strlen(s)+1; } -/** Wrapper function for geoip_get_client_history(). It first discards - * any items in the client history that are too old -- it dumps anything - * more than 48 hours old, but it only considers whether to dump at most - * once per 48 hours, so we aren't too precise to an observer (see also - * r14780). - */ -char * -extrainfo_get_client_geoip_summary(time_t now) -{ - static time_t last_purged_at = 0; - int geoip_purge_interval = - (get_options()->DirReqStatistics || get_options()->EntryStatistics) ? - DIR_ENTRY_RECORD_USAGE_RETAIN_IPS : 48*60*60; - if (now > last_purged_at+geoip_purge_interval) { - /* (Note that this also discards items in the client history with - * action GEOIP_CLIENT_NETWORKSTATUS{_V2}, which doesn't matter - * because bridge and directory stats are independent. Keep in mind - * for future extensions, though.) */ - geoip_remove_old_clients(now-geoip_purge_interval); - last_purged_at = now; - } - return geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT); -} - /** Return true iff s is a legally valid server nickname. */ int is_legal_nickname(const char *s)