From c934fced31a2dd6d53b129b6270109bc48c1ebb0 Mon Sep 17 00:00:00 2001 From: David Goulet Date: Tue, 15 Dec 2020 11:41:42 -0500 Subject: [PATCH 1/3] relay: Report the entire content of a stats file It turns out that 9 years ago, we stopped appending data into stats file and rather overwrite everytime we have new stats (see commit a6a127c833eace1100aca7ab8ad118862bb8a8b9) The load_stats_file() function was still thinking that we could have the same line many times in the file which turns out to be false since 9 years ago. However, that did not cause problem until IPv6 connection stats came along which introduced a new line in conn-stats: "ipv6-conn-bi-direct ...". Before, that file contained a single line starting with the tag "conn-bi-direct". That very tag appears also in the IPv6 tag (see above) so the load_stats_file() function would consider that the IPv6 line as the last tag to be appeneded to the file and fail to report the line above (for IPv4). It would actually truncate the IPv6 line and report it (removing the "ipv6-" part). In other words, "conn-bi-direct" was not reported and instead "ipv6-conn-bi-direct" was used without the "ipv6-" part. This commit refactors the entire function so that now it looks for a "timestamp tag" to validate and then if everything is fine, returns the entire content of the file. The refactor simplifies the function, adds logging in case of failures and modernize it in terms of coding standard. Unit tests are also added that makes sure the loaded content matches the entire file if timestamp validation passes. Fixes #40226 Signed-off-by: David Goulet --- changes/ticket40226 | 5 ++ src/feature/relay/router.c | 106 ++++++++++++++++++++++--------------- src/feature/relay/router.h | 2 + src/test/test_stats.c | 65 +++++++++++++++++++++++ 4 files changed, 135 insertions(+), 43 deletions(-) create mode 100644 changes/ticket40226 diff --git a/changes/ticket40226 b/changes/ticket40226 new file mode 100644 index 0000000000..4775438f63 --- /dev/null +++ b/changes/ticket40226 @@ -0,0 +1,5 @@ + o Minor bugfixes (relay, statistics): + - The connection statistics were wrongly exported in the extrainfo document + due to a problem in the file loading function which would wrongly truncate + the file reporting the wrong information. It is now fixed. Fixes bug + 40226; bugfix on 0.4.5.1-alpha. diff --git a/src/feature/relay/router.c b/src/feature/relay/router.c index 3aa9561f4f..abe23f9aae 100644 --- a/src/feature/relay/router.c +++ b/src/feature/relay/router.c @@ -3139,57 +3139,77 @@ router_dump_exit_policy_to_string(const routerinfo_t *router, include_ipv6); } -/** Load the contents of filename, find the last line starting with - * end_line, ensure that its timestamp is not more than 25 hours in - * the past or more than 1 hour in the future with respect to now, - * and write the file contents starting with that line to *out. - * Return 1 for success, 0 if the file does not exist or is empty, or -1 - * if the file does not contain a line matching these criteria or other - * failure. */ -static int -load_stats_file(const char *filename, const char *end_line, time_t now, +/** Load the contents of filename, find a line starting with + * timestamp tag ts_tag, ensure that its timestamp is not more than 25 + * hours in the past or more than 1 hour in the future with respect to + * now, and write the entire file contents into out. + * + * The timestamp expected should be an ISO-formatted UTC time value which is + * parsed using our parse_iso_time() function. + * + * In case more than one tag are found in the file, the very first one is + * used. + * + * Return 1 for success, 0 if the file does not exist or is empty, or -1 if + * the file does not contain a line with the timestamp tag. */ +STATIC int +load_stats_file(const char *filename, const char *ts_tag, time_t now, char **out) { int r = -1; char *fname = get_datadir_fname(filename); - char *contents, *start = NULL, *tmp, timestr[ISO_TIME_LEN+1]; + char *contents = NULL, timestr[ISO_TIME_LEN+1]; time_t written; + switch (file_status(fname)) { - case FN_FILE: - /* X022 Find an alternative to reading the whole file to memory. */ - if ((contents = read_file_to_str(fname, 0, NULL))) { - tmp = strstr(contents, end_line); - /* Find last block starting with end_line */ - while (tmp) { - start = tmp; - tmp = strstr(tmp + 1, end_line); - } - if (!start) - goto notfound; - if (strlen(start) < strlen(end_line) + 1 + sizeof(timestr)) - goto notfound; - strlcpy(timestr, start + 1 + strlen(end_line), sizeof(timestr)); - if (parse_iso_time(timestr, &written) < 0) - goto notfound; - if (written < now - (25*60*60) || written > now + (1*60*60)) - goto notfound; - *out = tor_strdup(start); - r = 1; - } - notfound: - tor_free(contents); - break; - /* treat empty stats files as if the file doesn't exist */ - case FN_NOENT: - case FN_EMPTY: - r = 0; - break; - case FN_ERROR: - case FN_DIR: - default: - break; + case FN_FILE: + contents = read_file_to_str(fname, 0, NULL); + if (contents == NULL) { + log_debug(LD_BUG, "Unable to read content of %s", filename); + goto end; + } + /* Find the timestamp tag to validate that the file is not too old or if + * exists. */ + const char *ts_tok = find_str_at_start_of_line(contents, ts_tag); + if (!ts_tok) { + log_warn(LD_BUG, "Token %s not found in file %s", ts_tag, filename); + goto end; + } + /* Do we have enough for parsing a timestamp? */ + if (strlen(contents) < strlen(ts_tag) + 1 + sizeof(timestr)) { + log_warn(LD_BUG, "Token %s malformed in file %s", ts_tag, filename); + goto end; + } + /* Parse timestamp in order to validate it is not too old. */ + strlcpy(timestr, contents + strlen(ts_tag) + 1, sizeof(timestr)); + if (parse_iso_time(timestr, &written) < 0) { + log_warn(LD_BUG, "Token %s has a malformed timestamp in file %s", + ts_tag, filename); + goto end; + } + if (written < now - (25*60*60) || written > now + (1*60*60)) { + /* This can happen normally so don't log. */ + goto end; + } + /* Success. Put in the entire content. */ + *out = contents; + contents = NULL; /* Must not free it. */ + r = 1; + break; + /* treat empty stats files as if the file doesn't exist */ + case FN_NOENT: + case FN_EMPTY: + r = 0; + break; + case FN_ERROR: + case FN_DIR: + default: + break; } + + end: tor_free(fname); + tor_free(contents); return r; } diff --git a/src/feature/relay/router.h b/src/feature/relay/router.h index f71ada8eb7..2648bb5112 100644 --- a/src/feature/relay/router.h +++ b/src/feature/relay/router.h @@ -129,6 +129,8 @@ void router_free_all(void); STATIC void get_platform_str(char *platform, size_t len); STATIC int router_write_fingerprint(int hashed, int ed25519_identity); STATIC smartlist_t *get_my_declared_family(const or_options_t *options); +STATIC int load_stats_file(const char *filename, const char *ts_tag, + time_t now, char **out); #ifdef TOR_UNIT_TESTS extern time_t desc_clean_since; diff --git a/src/test/test_stats.c b/src/test/test_stats.c index b6849b0b6d..154acd4f9f 100644 --- a/src/test/test_stats.c +++ b/src/test/test_stats.c @@ -31,6 +31,7 @@ #define MAINLOOP_PRIVATE #define STATEFILE_PRIVATE #define BWHIST_PRIVATE +#define ROUTER_PRIVATE #include "core/or/or.h" #include "lib/err/backtrace.h" @@ -45,6 +46,7 @@ #include "app/config/statefile.h" #include "feature/stats/bwhist.h" #include "feature/stats/bw_array_st.h" +#include "feature/relay/router.h" /** Run unit tests for some stats code. */ static void @@ -493,6 +495,68 @@ test_get_bandwidth_lines(void *arg) bwhist_free_all(); } +static void +test_load_stats_file(void *arg) +{ + int ret; + char *content = NULL, *read_file_content = NULL; + const char *fname = NULL; + + (void) arg; + + /* Load conn-stats. */ + fname = get_fname("conn-stats"); + tt_assert(fname); + read_file_content = tor_strdup( + "conn-bi-direct 2020-12-13 15:48:53 (86400 s) 12,34,56,78\n" + "ipv6-conn-bi-direct 2020-12-14 15:48:53 (86400 s) 21,43,65,87\n"); + write_str_to_file(fname, read_file_content, 0); + ret = load_stats_file("conn-stats", "conn-bi-direct", 1607874000, &content); + tt_int_op(ret, OP_EQ, 1); + tt_str_op(read_file_content, OP_EQ, content); + + /* Load hidserv-stats. */ + fname = get_fname("hidserv-stats"); + tt_assert(fname); + tor_free(read_file_content); + read_file_content = tor_strdup( + "hidserv-stats-end 2020-12-13 15:48:53 (86400 s)\n" + "hidserv-rend-relayed-cells 48754891 delta_f=2048 epsilon=0.30 " + "bin_size=1024\n" + "hidserv-dir-onions-seen 53 delta_f=8 epsilon=0.30 bin_size=8\n"); + write_str_to_file(fname, read_file_content, 0); + tor_free(content); + ret = load_stats_file("hidserv-stats", "hidserv-stats-end", 1607874000, + &content); + tt_int_op(ret, OP_EQ, 1); + tt_str_op(read_file_content, OP_EQ, content); + + /* Load dirreq-stats. */ + fname = get_fname("dirreq-stats"); + tt_assert(fname); + tor_free(read_file_content); + read_file_content = tor_strdup( + "dirreq-stats-end 2020-12-13 15:48:53 (86400 s)\n" + "dirreq-v3-ips ru=1728,us=1144,de=696,ir=432,gb=328,fr=304,in=296,ua=232\n" + "dirreq-v3-reqs ru=3616,us=3576,de=1896,fr=800,gb=632,ir=616\n" + "dirreq-v3-resp ok=18472,not-enough-sigs=0,unavailable=0,not-found=0," + "not-modified=3136,busy=0\n" + "dirreq-v3-direct-dl complete=0,timeout=0,running=0\n" + "dirreq-v3-tunneled-dl complete=18124,timeout=348,running=4,min=257," + "d1=133653,d2=221050,q1=261242,d3=300622,d4=399758,md=539051,d6=721322," + "d7=959866,q3=1103363,d8=1302035,d9=2046125,max=113404000\n"); + write_str_to_file(fname, read_file_content, 0); + tor_free(content); + ret = load_stats_file("dirreq-stats", "dirreq-stats-end", 1607874000, + &content); + tt_int_op(ret, OP_EQ, 1); + tt_str_op(read_file_content, OP_EQ, content); + + done: + tor_free(read_file_content); + tor_free(content); +} + #define ENT(name) \ { #name, test_ ## name , 0, NULL, NULL } #define FORK(name) \ @@ -506,6 +570,7 @@ struct testcase_t stats_tests[] = { FORK(add_obs), FORK(fill_bandwidth_history), FORK(get_bandwidth_lines), + FORK(load_stats_file), END_OF_TESTCASES }; From 5dd6304f36a9cb9b7f778abe5b84cf6a4e9890a6 Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Wed, 16 Dec 2020 09:58:19 +0100 Subject: [PATCH 2/3] Fix timestamp parser in new load_stats_file. The previous parser only considered stats files _starting_ with the timestamp tag, not stats files having the timestamp tag in a later position. While this applies to all current stats files, a future stats file might look differently. Better to fix the function now than be surprised in another 9 years from now. This commit also adds a test case for such future stats, and it fixes stats file paths in newly added unit tests. --- src/feature/relay/router.c | 4 ++-- src/test/test_stats.c | 26 +++++++++++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/feature/relay/router.c b/src/feature/relay/router.c index abe23f9aae..e018561556 100644 --- a/src/feature/relay/router.c +++ b/src/feature/relay/router.c @@ -3176,12 +3176,12 @@ load_stats_file(const char *filename, const char *ts_tag, time_t now, goto end; } /* Do we have enough for parsing a timestamp? */ - if (strlen(contents) < strlen(ts_tag) + 1 + sizeof(timestr)) { + if (strlen(ts_tok) < strlen(ts_tag) + 1 + sizeof(timestr)) { log_warn(LD_BUG, "Token %s malformed in file %s", ts_tag, filename); goto end; } /* Parse timestamp in order to validate it is not too old. */ - strlcpy(timestr, contents + strlen(ts_tag) + 1, sizeof(timestr)); + strlcpy(timestr, ts_tok + strlen(ts_tag) + 1, sizeof(timestr)); if (parse_iso_time(timestr, &written) < 0) { log_warn(LD_BUG, "Token %s has a malformed timestamp in file %s", ts_tag, filename); diff --git a/src/test/test_stats.c b/src/test/test_stats.c index 154acd4f9f..7262477d77 100644 --- a/src/test/test_stats.c +++ b/src/test/test_stats.c @@ -499,13 +499,12 @@ static void test_load_stats_file(void *arg) { int ret; - char *content = NULL, *read_file_content = NULL; - const char *fname = NULL; + char *content = NULL, *read_file_content = NULL, *fname = NULL; (void) arg; /* Load conn-stats. */ - fname = get_fname("conn-stats"); + fname = get_datadir_fname("conn-stats"); tt_assert(fname); read_file_content = tor_strdup( "conn-bi-direct 2020-12-13 15:48:53 (86400 s) 12,34,56,78\n" @@ -516,7 +515,7 @@ test_load_stats_file(void *arg) tt_str_op(read_file_content, OP_EQ, content); /* Load hidserv-stats. */ - fname = get_fname("hidserv-stats"); + fname = get_datadir_fname("hidserv-stats"); tt_assert(fname); tor_free(read_file_content); read_file_content = tor_strdup( @@ -525,6 +524,7 @@ test_load_stats_file(void *arg) "bin_size=1024\n" "hidserv-dir-onions-seen 53 delta_f=8 epsilon=0.30 bin_size=8\n"); write_str_to_file(fname, read_file_content, 0); + tor_free(fname); tor_free(content); ret = load_stats_file("hidserv-stats", "hidserv-stats-end", 1607874000, &content); @@ -532,7 +532,7 @@ test_load_stats_file(void *arg) tt_str_op(read_file_content, OP_EQ, content); /* Load dirreq-stats. */ - fname = get_fname("dirreq-stats"); + fname = get_datadir_fname("dirreq-stats"); tt_assert(fname); tor_free(read_file_content); read_file_content = tor_strdup( @@ -546,13 +546,29 @@ test_load_stats_file(void *arg) "d1=133653,d2=221050,q1=261242,d3=300622,d4=399758,md=539051,d6=721322," "d7=959866,q3=1103363,d8=1302035,d9=2046125,max=113404000\n"); write_str_to_file(fname, read_file_content, 0); + tor_free(fname); tor_free(content); ret = load_stats_file("dirreq-stats", "dirreq-stats-end", 1607874000, &content); tt_int_op(ret, OP_EQ, 1); tt_str_op(read_file_content, OP_EQ, content); + /* Attempt to load future-stats file not starting with timestamp tag. */ + fname = get_datadir_fname("future-stats"); + tt_assert(fname); + tor_free(read_file_content); + read_file_content = tor_strdup( + "future-stuff-at-file-start\n" + "future-stats 2020-12-13 15:48:53 (86400 s)\n"); + write_str_to_file(fname, read_file_content, 0); + tor_free(fname); + tor_free(content); + ret = load_stats_file("future-stats", "future-stats", 1607874000, &content); + tt_int_op(ret, OP_EQ, 1); + tt_str_op(read_file_content, OP_EQ, content); + done: + tor_free(fname); tor_free(read_file_content); tor_free(content); } From f4cbcde2da39dec4be26aa1d2ffcb4d26603e1fa Mon Sep 17 00:00:00 2001 From: David Goulet Date: Mon, 21 Dec 2020 13:10:58 -0500 Subject: [PATCH 3/3] test: Fix memleak in test/load_stats_file Signed-off-by: David Goulet --- src/test/test_stats.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/test_stats.c b/src/test/test_stats.c index 7262477d77..d45afc7b15 100644 --- a/src/test/test_stats.c +++ b/src/test/test_stats.c @@ -515,6 +515,7 @@ test_load_stats_file(void *arg) tt_str_op(read_file_content, OP_EQ, content); /* Load hidserv-stats. */ + tor_free(fname); fname = get_datadir_fname("hidserv-stats"); tt_assert(fname); tor_free(read_file_content); @@ -524,7 +525,6 @@ test_load_stats_file(void *arg) "bin_size=1024\n" "hidserv-dir-onions-seen 53 delta_f=8 epsilon=0.30 bin_size=8\n"); write_str_to_file(fname, read_file_content, 0); - tor_free(fname); tor_free(content); ret = load_stats_file("hidserv-stats", "hidserv-stats-end", 1607874000, &content); @@ -532,6 +532,7 @@ test_load_stats_file(void *arg) tt_str_op(read_file_content, OP_EQ, content); /* Load dirreq-stats. */ + tor_free(fname); fname = get_datadir_fname("dirreq-stats"); tt_assert(fname); tor_free(read_file_content); @@ -546,7 +547,6 @@ test_load_stats_file(void *arg) "d1=133653,d2=221050,q1=261242,d3=300622,d4=399758,md=539051,d6=721322," "d7=959866,q3=1103363,d8=1302035,d9=2046125,max=113404000\n"); write_str_to_file(fname, read_file_content, 0); - tor_free(fname); tor_free(content); ret = load_stats_file("dirreq-stats", "dirreq-stats-end", 1607874000, &content); @@ -554,6 +554,7 @@ test_load_stats_file(void *arg) tt_str_op(read_file_content, OP_EQ, content); /* Attempt to load future-stats file not starting with timestamp tag. */ + tor_free(fname); fname = get_datadir_fname("future-stats"); tt_assert(fname); tor_free(read_file_content); @@ -561,7 +562,6 @@ test_load_stats_file(void *arg) "future-stuff-at-file-start\n" "future-stats 2020-12-13 15:48:53 (86400 s)\n"); write_str_to_file(fname, read_file_content, 0); - tor_free(fname); tor_free(content); ret = load_stats_file("future-stats", "future-stats", 1607874000, &content); tt_int_op(ret, OP_EQ, 1);