diff --git a/changes/task2196 b/changes/task2196 new file mode 100644 index 0000000000..e629fccac2 --- /dev/null +++ b/changes/task2196 @@ -0,0 +1,5 @@ + o Minor features: + - Report only the top 10 ports in exit-port stats in order not to + exceed the maximum extra-info descriptor length of 50 KB. Implements + task 2196. + diff --git a/src/or/rephist.c b/src/or/rephist.c index d41d715aee..175ea01756 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -1930,9 +1930,8 @@ dump_pk_ops(int severity) #define EXIT_STATS_ROUND_UP_STREAMS 4 /** Number of TCP ports */ #define EXIT_STATS_NUM_PORTS 65536 -/** Reciprocal of threshold (= 0.01%) of total bytes that a port needs to - * see in order to be included in exit stats. */ -#define EXIT_STATS_THRESHOLD_RECIPROCAL 10000 +/** Top n ports that will be included in exit stats. */ +#define EXIT_STATS_TOP_N_PORTS 10 /* The following data structures are arrays and no fancy smartlists or maps, * so that all write operations can be done in constant time. This comes at @@ -1982,15 +1981,23 @@ rep_hist_exit_stats_term(void) tor_free(exit_streams); } +/** Helper for qsort: compare two ints. */ +static int +_compare_int(const void *x, const void *y) { + return (*(int*)x - *(int*)y); +} + /** Return a newly allocated string containing the exit port statistics * until now, or NULL if we're not collecting exit stats. */ char * rep_hist_format_exit_stats(time_t now) { - int i; - uint64_t total_bytes = 0, threshold_bytes, other_read = 0, - other_written = 0; - uint32_t other_streams = 0; + int i, j, top_elements = 0, cur_min_idx = 0, cur_port; + uint64_t top_bytes[EXIT_STATS_TOP_N_PORTS]; + int top_ports[EXIT_STATS_TOP_N_PORTS]; + uint64_t cur_bytes = 0, other_read = 0, other_written = 0, + total_read = 0, total_written = 0; + uint32_t total_streams = 0, other_streams = 0; char *buf; smartlist_t *written_strings, *read_strings, *streams_strings; char *written_string, *read_string, *streams_string; @@ -2000,52 +2007,101 @@ rep_hist_format_exit_stats(time_t now) if (!start_of_exit_stats_interval) return NULL; /* Not initialized. */ - /* Count total number of bytes, so that we can attribute observations - * below or equal to a threshold of 1 / EXIT_STATS_THRESHOLD_RECIPROCAL - * of all bytes to a special port 'other'. */ + /* Go through all ports to find the n ports that saw most written and + * read bytes. + * + * Invariant: at the end of the loop for iteration i, + * total_read is the sum of all exit_bytes_read[0..i] + * total_written is the sum of all exit_bytes_written[0..i] + * total_stream is the sum of all exit_streams[0..i] + * + * top_elements = MAX(EXIT_STATS_TOP_N_PORTS, + * #{j | 0 <= j <= i && volume(i) > 0}) + * + * For all 0 <= j < top_elements, + * top_bytes[j] > 0 + * 0 <= top_ports[j] <= 65535 + * top_bytes[j] = volume(top_ports[j]) + * + * There is no j in 0..i and k in 0..top_elements such that: + * volume(j) > top_bytes[k] AND j is not in top_ports[0..top_elements] + * + * There is no j!=cur_min_idx in 0..top_elements such that: + * top_bytes[j] < top_bytes[cur_min_idx] + * + * where volume(x) == exit_bytes_read[x]+exit_bytes_written[x] + * + * Worst case: O(EXIT_STATS_NUM_PORTS * EXIT_STATS_TOP_N_PORTS) + */ for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) { - total_bytes += exit_bytes_read[i]; - total_bytes += exit_bytes_written[i]; + total_read += exit_bytes_read[i]; + total_written += exit_bytes_written[i]; + total_streams += exit_streams[i]; + cur_bytes = exit_bytes_read[i] + exit_bytes_written[i]; + if (cur_bytes == 0) { + continue; + } + if (top_elements < EXIT_STATS_TOP_N_PORTS) { + top_bytes[top_elements] = cur_bytes; + top_ports[top_elements++] = i; + } else if (cur_bytes > top_bytes[cur_min_idx]) { + top_bytes[cur_min_idx] = cur_bytes; + top_ports[cur_min_idx] = i; + } else { + continue; + } + cur_min_idx = 0; + for (j = 1; j < top_elements; j++) { + if (top_bytes[j] < top_bytes[cur_min_idx]) { + cur_min_idx = j; + } + } } - threshold_bytes = total_bytes / EXIT_STATS_THRESHOLD_RECIPROCAL; - /* Add observations of all ports above the threshold to smartlists and - * join them to single strings. Also count bytes and streams of ports - * below or equal to the threshold. */ + /* Add observations of top ports to smartlists. */ written_strings = smartlist_create(); read_strings = smartlist_create(); streams_strings = smartlist_create(); - for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) { - if (exit_bytes_read[i] + exit_bytes_written[i] > threshold_bytes) { - if (exit_bytes_written[i] > 0) { - uint64_t num = round_uint64_to_next_multiple_of( - exit_bytes_written[i], EXIT_STATS_ROUND_UP_BYTES); - num /= 1024; - buf = NULL; - tor_asprintf(&buf, "%d="U64_FORMAT, i, U64_PRINTF_ARG(num)); - smartlist_add(written_strings, buf); - } - if (exit_bytes_read[i] > 0) { - uint64_t num = round_uint64_to_next_multiple_of( - exit_bytes_read[i], EXIT_STATS_ROUND_UP_BYTES); - num /= 1024; - buf = NULL; - tor_asprintf(&buf, "%d="U64_FORMAT, i, U64_PRINTF_ARG(num)); - smartlist_add(read_strings, buf); - } - if (exit_streams[i] > 0) { - uint32_t num = round_uint32_to_next_multiple_of(exit_streams[i], - EXIT_STATS_ROUND_UP_STREAMS); - buf = NULL; - tor_asprintf(&buf, "%d=%u", i, num); - smartlist_add(streams_strings, buf); - } - } else { - other_read += exit_bytes_read[i]; - other_written += exit_bytes_written[i]; - other_streams += exit_streams[i]; + other_read = total_read; + other_written = total_written; + other_streams = total_streams; + /* Sort the ports; this puts them out of sync with top_bytes, but we + * won't be using top_bytes again anyway */ + qsort(top_ports, top_elements, sizeof(int), _compare_int); + for (j = 0; j < top_elements; j++) { + cur_port = top_ports[j]; + if (exit_bytes_written[cur_port] > 0) { + uint64_t num = round_uint64_to_next_multiple_of( + exit_bytes_written[cur_port], + EXIT_STATS_ROUND_UP_BYTES); + num /= 1024; + buf = NULL; + tor_asprintf(&buf, "%d="U64_FORMAT, cur_port, U64_PRINTF_ARG(num)); + smartlist_add(written_strings, buf); + other_written -= exit_bytes_written[cur_port]; + } + if (exit_bytes_read[cur_port] > 0) { + uint64_t num = round_uint64_to_next_multiple_of( + exit_bytes_read[cur_port], + EXIT_STATS_ROUND_UP_BYTES); + num /= 1024; + buf = NULL; + tor_asprintf(&buf, "%d="U64_FORMAT, cur_port, U64_PRINTF_ARG(num)); + smartlist_add(read_strings, buf); + other_read -= exit_bytes_read[cur_port]; + } + if (exit_streams[cur_port] > 0) { + uint32_t num = round_uint32_to_next_multiple_of( + exit_streams[cur_port], + EXIT_STATS_ROUND_UP_STREAMS); + buf = NULL; + tor_asprintf(&buf, "%d=%u", cur_port, num); + smartlist_add(streams_strings, buf); + other_streams -= exit_streams[cur_port]; } } + + /* Add observations of other ports in a single element. */ other_written = round_uint64_to_next_multiple_of(other_written, EXIT_STATS_ROUND_UP_BYTES); other_written /= 1024; @@ -2063,6 +2119,8 @@ rep_hist_format_exit_stats(time_t now) buf = NULL; tor_asprintf(&buf, "other=%u", other_streams); smartlist_add(streams_strings, buf); + + /* Join all observations in single strings. */ written_string = smartlist_join_strings(written_strings, ",", 0, NULL); read_string = smartlist_join_strings(read_strings, ",", 0, NULL); streams_string = smartlist_join_strings(streams_strings, ",", 0, NULL); diff --git a/src/test/test.c b/src/test/test.c index 3f014186a0..960a6659a7 100644 --- a/src/test/test.c +++ b/src/test/test.c @@ -1102,6 +1102,7 @@ test_stats(void) { time_t now = 1281533250; /* 2010-08-11 13:27:30 UTC */ char *s = NULL; + int i; /* We shouldn't collect exit stats without initializing them. */ rep_hist_note_exit_stream_opened(80); @@ -1124,6 +1125,22 @@ test_stats(void) "exit-streams-opened 80=4,443=4,other=0\n", s); tor_free(s); + /* Add a few bytes on 10 more ports and ensure that only the top 10 + * ports are contained in the history string. */ + for (i = 50; i < 60; i++) { + rep_hist_note_exit_bytes(i, i, i); + rep_hist_note_exit_stream_opened(i); + } + s = rep_hist_format_exit_stats(now + 86400); + test_streq("exit-stats-end 2010-08-12 13:27:30 (86400 s)\n" + "exit-kibibytes-written 52=1,53=1,54=1,55=1,56=1,57=1,58=1," + "59=1,80=1,443=1,other=1\n" + "exit-kibibytes-read 52=1,53=1,54=1,55=1,56=1,57=1,58=1," + "59=1,80=10,443=20,other=1\n" + "exit-streams-opened 52=4,53=4,54=4,55=4,56=4,57=4,58=4," + "59=4,80=4,443=4,other=4\n", s); + tor_free(s); + /* Stop collecting stats, add some bytes, and ensure we don't generate * a history string. */ rep_hist_exit_stats_term();