From 011b7324366ec9b4c5546b42cb702b440ab7c672 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Thu, 25 Jun 2009 17:03:39 -0700 Subject: [PATCH 1/4] Update 161 to reflect current implementation. Also mention rounding step. --- .../161-computing-bandwidth-adjustments.txt | 72 ++++++++----------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/doc/spec/proposals/161-computing-bandwidth-adjustments.txt b/doc/spec/proposals/161-computing-bandwidth-adjustments.txt index b02dc64eb2..a50dba687e 100644 --- a/doc/spec/proposals/161-computing-bandwidth-adjustments.txt +++ b/doc/spec/proposals/161-computing-bandwidth-adjustments.txt @@ -42,31 +42,26 @@ Status: Open slices of 50 nodes each, grouped according to advertised node bandwidth. Two hop circuits are built using nodes from the same slice, and a large - file is downloaded via these circuits. For nodes in the first 15% of the - network, a 500K file will be used. For nodes in the next 15%, a 250K file - will be used. For nodes in next 15%, a 100K file will be used. The - remainder of the nodes will fetch a 75K file.[1] + file is downloaded via these circuits. The file sizes are set based + on node percentile rank as follows: + + 0-10: 4M + 10-20: 2M + 20-30: 1M + 30-50: 512k + 50-75: 256k + 75-100: 128k - This process is repeated 250 times, and average stream capacities are - assigned to each node from these results. - - In the future, a node generator type can be created to ensure that - each node is chosen to participate in an equal number of circuits, - and the selection will continue until every live node is chosen - to participate in at least 7 circuits. - + These sizes are based on measurements performed during test scans. -4. Ratio Calculation Options + This process is repeated until each node has been chosen to participate + in at least 5 circuits. - There are two options for deriving the ratios themselves. They can - be obtained by dividing each nodes' average stream capacity by - either the average for the slice, or the average for the network as a - whole. - Dividing by the network-wide average has the advantage that it will - account for issues related to unbalancing between higher vs lower - capacity, such as Steven Murdoch's queuing theory weighting result. - For this reason, we will opt for network-wide averages. +4. Ratio Calculation + + The ratios are calculated by dividing each measured value by the + network-wide average. 5. Ratio Filtering @@ -77,10 +72,8 @@ Status: Open with capacity of one standard deviation below a node's average are also removed. - The final ratio result will be calculated as the maximum of - these two resulting ratios if both are less than 1.0, the minimum - if both are greater than 1.0, and the mean if one is greater - and one is less than 1.0. + The final ratio result will be the unfiltered ratio if it is + close to 1.0, otherwise it will be the filtered ratio. 6. Pseudocode for Ratio Calculation Algorithm @@ -109,12 +102,7 @@ Status: Open Bw_net_ratio(N) = Bw_measured(N)/Bw_net_avg(Slices) Bw_Norm_net_ratio(N) = Bw_measured2(N)/Bw_Norm_net_avg(Slices) - if Bw_net_ratio(N) < 1.0 and Bw_Norm_net_ratio(N) < 1.0: - ResultRatio(N) = MAX(Bw_net_ratio(N), Bw_Norm_net_ratio(N)) - else if Bw_net_ratio(N) > 1.0 and Bw_Norm_net_ratio(N) > 1.0: - ResultRatio(N) = MIN(Bw_net_ratio(N), Bw_Norm_net_ratio(N)) - else: - ResultRatio(N) = MEAN(Bw_net_ratio(N), Bw_Norm_net_ratio(N)) + ResultRatio(N) = ClosestToOne(Bw_net_ratio(N), Bw_Norm_net_ratio(N)) 7. Security implications @@ -126,14 +114,14 @@ Status: Open This scheme will not address nodes that try to game the system by providing better service to scanners. The scanners can be detected - at the entry by IP address, and at the exit by the destination fetch. + at the entry by IP address, and at the exit by the destination fetch + IP. Measures can be taken to obfuscate and separate the scanners' source IP address from the directory authority IP address. For instance, scans can happen offsite and the results can be rsynced into the - authorities. The destination fetch can also be obscured by using SSL - and periodically changing the large document that is fetched. - + authorities. The destination server IP can also change. + Neither of these methods are foolproof, but such nodes can already lie about their bandwidth to attract more traffic, so this solution does not set us back any in that regard. @@ -148,7 +136,7 @@ Status: Open over a portion of the network, outputting files of the form: node_id= SP strm_bw= SP - filt_bw= NL + filt_bw= ns_bw= NL The most recent file from each scanner will be periodically gathered by another script that uses them to produce network-wide averages @@ -166,10 +154,15 @@ Status: Open scan, and taking the weighted average with the previous consensus bandwidth: - Bw_new = (Bw_current * Alpha + Bw_scan_avg*Bw_ratio)/(Alpha + 1) + Bw_new = Round((Bw_current * Alpha + Bw_scan_avg*Bw_ratio)/(Alpha + 1)) The Alpha parameter is a smoothing parameter intended to prevent - rapid oscillation between loaded and unloaded conditions. + rapid oscillation between loaded and unloaded conditions. It is + currently fixed at 0.333. + + The Round() step consists of rounding to the 3 most significant figures + in base10, and then rounding that result to the nearest 1000, with + a minimum value of 1000. This will produce a new bandwidth value that will be output into a file consisting of lines of the form: @@ -183,6 +176,3 @@ Status: Open This file can be either copied or rsynced into a directory readable by the directory authority. - -1. Exact values for each segment are still being determined via -test scans. From 3a1b9526f89986aedf2cb9bfdf0874804935179f Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 14 Jul 2009 20:47:51 -0700 Subject: [PATCH 2/4] Update the ratio calculation to prefer faster measurements. Also: simplify complicated filtering steps, always take the most recent measurement, and use slightly smaller file sizes based on measurements. --- .../161-computing-bandwidth-adjustments.txt | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/doc/spec/proposals/161-computing-bandwidth-adjustments.txt b/doc/spec/proposals/161-computing-bandwidth-adjustments.txt index a50dba687e..786e1afebd 100644 --- a/doc/spec/proposals/161-computing-bandwidth-adjustments.txt +++ b/doc/spec/proposals/161-computing-bandwidth-adjustments.txt @@ -45,12 +45,11 @@ Status: Open file is downloaded via these circuits. The file sizes are set based on node percentile rank as follows: - 0-10: 4M - 10-20: 2M - 20-30: 1M - 30-50: 512k - 50-75: 256k - 75-100: 128k + 0-10: 2M + 10-20: 1M + 20-30: 512k + 30-50: 256k + 50-100: 128k These sizes are based on measurements performed during test scans. @@ -72,8 +71,8 @@ Status: Open with capacity of one standard deviation below a node's average are also removed. - The final ratio result will be the unfiltered ratio if it is - close to 1.0, otherwise it will be the filtered ratio. + The final ratio result will be greater of the unfiltered ratio + and the filtered ratio. 6. Pseudocode for Ratio Calculation Algorithm @@ -88,11 +87,8 @@ Status: Open BW_measured(N) = MEAN(b | b is bandwidth of a stream through N) Bw_stddev(N) = STDDEV(b | b is bandwidth of a stream through N) Bw_avg(S) = MEAN(b | b = BW_measured(N) for all N in S) - Normal_Routers(S) = {N | Bw_measured(N)/Bw_avg(S) > 0.5 } for N in S: - Normal_Streams(N) = - {stream via N | all nodes in stream not in {Normal_Routers(S)-N} - and bandwidth > BW_measured(N)-Bw_stddev(N)} + Normal_Streams(N) = {stream via N | bandwidth >= BW_measured(N)} BW_Norm_measured(N) = MEAN(b | b is a bandwidth of Normal_Streams(N)) Bw_net_avg(Slices) = MEAN(BW_measured(N) for all N in Slices) @@ -100,9 +96,9 @@ Status: Open for N in all Slices: Bw_net_ratio(N) = Bw_measured(N)/Bw_net_avg(Slices) - Bw_Norm_net_ratio(N) = Bw_measured2(N)/Bw_Norm_net_avg(Slices) + Bw_Norm_net_ratio(N) = BW_Norm_measured(N)/Bw_Norm_net_avg(Slices) - ResultRatio(N) = ClosestToOne(Bw_net_ratio(N), Bw_Norm_net_ratio(N)) + ResultRatio(N) = MAX(Bw_net_ratio(N), Bw_Norm_net_ratio(N)) 7. Security implications @@ -142,8 +138,8 @@ Status: Open by another script that uses them to produce network-wide averages and calculate ratios as per the algorithm in section 6. Because nodes may shift in capacity, they may appear in more than one slice and/or - appear more than once in the file set. The line that yields a ratio - closest to 1.0 will be chosen in this case. + appear more than once in the file set. The most recently measured + line will be chosen in this case. 9. Integration with Proposal 160 From 8196130f245ea4ccd893ad9b146a63f3fa5e68c1 Mon Sep 17 00:00:00 2001 From: Roger Dingledine Date: Fri, 7 Aug 2009 17:55:17 -0400 Subject: [PATCH 3/4] fix typo in control-spec --- doc/spec/control-spec.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/spec/control-spec.txt b/doc/spec/control-spec.txt index 20952996b2..fc4242ea16 100644 --- a/doc/spec/control-spec.txt +++ b/doc/spec/control-spec.txt @@ -773,7 +773,7 @@ Same as passing 'EXTENDED' to SETEVENTS; this is the preferred way to request the extended event syntax. - This feaure was first used in 0.1.2.3-alpha. It is always-on in + This feature was first used in 0.1.2.3-alpha. It is always-on in Tor 0.2.2.1-alpha and later. VERBOSE_NAMES From 3886467f386732598647a2d3209777ba8d8d7baa Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Sun, 9 Aug 2009 17:27:35 -0700 Subject: [PATCH 4/4] Add a new tor_strtok_r for platforms that don't have one, plus tests. I don't think we actually use (or plan to use) strtok_r in a reentrant way anywhere in our code, but would be nice not to have to think about whether we're doing it. --- src/common/compat.c | 31 +++++++++++++++++++++++++++++++ src/common/compat.h | 7 +++++++ src/or/test.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/src/common/compat.c b/src/common/compat.c index d62b1ce1f4..29425c2492 100644 --- a/src/common/compat.c +++ b/src/common/compat.c @@ -398,6 +398,37 @@ const char TOR_TOLOWER_TABLE[256] = { 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, }; +/** Implementation of strtok_r for platforms whose coders haven't figured out + * how to write one. Hey guys! You can use this code here for free! */ +char * +tor_strtok_r_impl(char *str, const char *sep, char **lasts) +{ + char *cp, *start; + if (str) + start = cp = *lasts = str; + else if (!*lasts) + return NULL; + else + start = cp = *lasts; + + tor_assert(*sep); + if (sep[1]) { + while (*cp && !strchr(sep, *cp)) + ++cp; + } else { + tor_assert(strlen(sep) == 1); + cp = strchr(cp, *sep); + } + + if (!cp || !*cp) { + *lasts = NULL; + } else { + *cp++ = '\0'; + *lasts = cp; + } + return start; +} + #ifdef MS_WINDOWS /** Take a filename and return a pointer to its final element. This * function is called on __FILE__ to fix a MSVC nit where __FILE__ diff --git a/src/common/compat.h b/src/common/compat.h index 4d5a016cf2..3d429486e8 100644 --- a/src/common/compat.h +++ b/src/common/compat.h @@ -267,6 +267,13 @@ extern const char TOR_TOLOWER_TABLE[]; #define TOR_TOLOWER(c) (TOR_TOLOWER_TABLE[(uint8_t)c]) #define TOR_TOUPPER(c) (TOR_TOUPPER_TABLE[(uint8_t)c]) +char *tor_strtok_r_impl(char *str, const char *sep, char **lasts); +#ifdef HAVE_STRTOK_R +#define tor_strok_r(str, sep, lasts) strtok_r(str, sep, lasts) +#else +#define tor_strok_r(str, sep, lasts) tor_strtok_r_impl(str, sep, lasts) +#endif + #ifdef MS_WINDOWS #define _SHORT_FILE_ (tor_fix_source_file(__FILE__)) const char *tor_fix_source_file(const char *fname); diff --git a/src/or/test.c b/src/or/test.c index 3103eed828..67a9c381fe 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -4284,6 +4284,39 @@ test_util_datadir(void) tor_free(f); } +static void +test_util_strtok(void) +{ + char buf[128]; + char buf2[128]; + char *cp1, *cp2; + strlcpy(buf, "Graved on the dark in gestures of descent", sizeof(buf)); + strlcpy(buf2, "they.seemed;their!own;most.perfect;monument", sizeof(buf2)); + /* -- "Year's End", Richard Wilbur */ + + test_streq("Graved", tor_strtok_r_impl(buf, " ", &cp1)); + test_streq("they", tor_strtok_r_impl(buf2, ".!..;!", &cp2)); +#define S1() tor_strtok_r_impl(NULL, " ", &cp1) +#define S2() tor_strtok_r_impl(NULL, ".!..;!", &cp2) + test_streq("on", S1()); + test_streq("the", S1()); + test_streq("dark", S1()); + test_streq("seemed", S2()); + test_streq("their", S2()); + test_streq("own", S2()); + test_streq("in", S1()); + test_streq("gestures", S1()); + test_streq("of", S1()); + test_streq("most", S2()); + test_streq("perfect", S2()); + test_streq("descent", S1()); + test_streq("monument", S2()); + test_assert(NULL == S1()); + test_assert(NULL == S2()); + done: + ; +} + /** Test AES-CTR encryption and decryption with IV. */ static void test_crypto_aes_iv(void) @@ -4692,6 +4725,7 @@ static struct { SUBENT(util, threads), SUBENT(util, order_functions), SUBENT(util, sscanf), + SUBENT(util, strtok), ENT(onion_handshake), ENT(dir_format), ENT(dirutil),