diff --git a/src/common/compress.c b/src/common/compress.c index 9a24025db0..8502dee25c 100644 --- a/src/common/compress.c +++ b/src/common/compress.c @@ -285,6 +285,26 @@ tor_compress_supports_method(compress_method_t method) } } +/** + * Return a bitmask of the supported compression types, where 1<<m is + * set in the bitmask if and only if compression with method m is + * supported. + */ +unsigned +tor_compress_get_supported_method_bitmask(void) +{ + static unsigned supported = 0; + if (supported == 0) { + compress_method_t m; + for (m = NO_METHOD; m <= UNKNOWN_METHOD; ++m) { + if (tor_compress_supports_method(m)) { + supported |= (1u << m); + } + } + } + return supported; +} + /** Table of compression method names. These should have an "x-" prefix, * if they are not listed in the IANA content coding registry. */ static const struct { diff --git a/src/common/compress.h b/src/common/compress.h index 95b70c02ec..5b47c5d458 100644 --- a/src/common/compress.h +++ b/src/common/compress.h @@ -16,12 +16,12 @@ * functions here. Call tor_compress_supports_method() to check if a given * compression schema is supported by Tor. */ typedef enum { - NO_METHOD=0, + NO_METHOD=0, // This method must be first. GZIP_METHOD=1, ZLIB_METHOD=2, LZMA_METHOD=3, ZSTD_METHOD=4, - UNKNOWN_METHOD=5 + UNKNOWN_METHOD=5, // This method must be last. Add new ones in the middle. } compress_method_t; /** @@ -48,6 +48,7 @@ compress_method_t detect_compression_method(const char *in, size_t in_len); int tor_compress_is_compression_bomb(size_t size_in, size_t size_out); int tor_compress_supports_method(compress_method_t method); +unsigned tor_compress_get_supported_method_bitmask(void); const char * compression_method_get_name(compress_method_t method); compress_method_t compression_method_get_by_name(const char *name); diff --git a/src/or/directory.c b/src/or/directory.c index 751d46910e..e53bc78a35 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -2928,6 +2928,31 @@ write_http_response_header(dir_connection_t *conn, ssize_t length, cache_lifetime); } +/** Parse the compression methods listed in an Accept-Encoding header h, + * and convert them to a bitfield where compression method x is supported if + * and only if 1 << x is set in the bitfield. */ +STATIC unsigned +parse_accept_encoding_header(const char *h) +{ + unsigned result = (1u << NO_METHOD); + smartlist_t *methods = smartlist_new(); + smartlist_split_string(methods, h, ",", + SPLIT_SKIP_SPACE|SPLIT_STRIP_SPACE|SPLIT_IGNORE_BLANK, 0); + + SMARTLIST_FOREACH_BEGIN(methods, const char *, m) { + compress_method_t method = compression_method_get_by_name(m); + if (method != UNKNOWN_METHOD) { + tor_assert(method < 8*sizeof(unsigned)); + result |= (1u << method); + } + } SMARTLIST_FOREACH_END(m); + SMARTLIST_FOREACH_BEGIN(methods, char *, m) { + tor_free(m); + } SMARTLIST_FOREACH_END(m); + smartlist_free(methods); + return result; +} + /** Decide whether a client would accept the consensus we have. * * Clients can say they only want a consensus if it's signed by more @@ -3002,8 +3027,9 @@ choose_compression_level(ssize_t n_bytes) /** Information passed to handle a GET request. */ typedef struct get_handler_args_t { - /** True if the client asked for compressed data. */ - int compressed; + /** Bitmask of compression methods that the client said (or implied) it + * supported. */ + unsigned compression_supported; /** If nonzero, the time included an if-modified-since header with this * value. */ time_t if_modified_since; @@ -3077,8 +3103,9 @@ directory_handle_command_get,(dir_connection_t *conn, const char *headers, { char *url, *url_mem, *header; time_t if_modified_since = 0; - int compressed; + int zlib_compressed_in_url; size_t url_len; + unsigned compression_methods_supported; /* We ignore the body of a GET request. */ (void)req_body; @@ -3109,17 +3136,30 @@ directory_handle_command_get,(dir_connection_t *conn, const char *headers, url_mem = url; url_len = strlen(url); - compressed = url_len > 2 && !strcmp(url+url_len-2, ".z"); - if (compressed) { + + zlib_compressed_in_url = url_len > 2 && !strcmp(url+url_len-2, ".z"); + if (zlib_compressed_in_url) { url[url_len-2] = '\0'; url_len -= 2; } + if ((header = http_get_header(headers, "Accept-Encoding"))) { + compression_methods_supported = parse_accept_encoding_header(header); + tor_free(header); + } else { + compression_methods_supported = (1u << NO_METHOD); + if (zlib_compressed_in_url) + compression_methods_supported |= (1u << ZLIB_METHOD); + } + + /* Remove all methods that we don't both support. */ + compression_methods_supported &= tor_compress_get_supported_method_bitmask(); + get_handler_args_t args; args.url = url; args.headers = headers; args.if_modified_since = if_modified_since; - args.compressed = compressed; + args.compression_supported = compression_methods_supported; int i, result = -1; for (i = 0; url_table[i].string; ++i) { @@ -3198,7 +3238,7 @@ handle_get_current_consensus(dir_connection_t *conn, const get_handler_args_t *args) { const char *url = args->url; - const int compressed = args->compressed; + const int compressed = args->compression_supported & (1u << ZLIB_METHOD); const time_t if_modified_since = args->if_modified_since; int clear_spool = 0; @@ -3339,7 +3379,7 @@ static int handle_get_status_vote(dir_connection_t *conn, const get_handler_args_t *args) { const char *url = args->url; - const int compressed = args->compressed; + const int compressed = args->compression_supported & (1u << ZLIB_METHOD); { int current; ssize_t body_len = 0; @@ -3446,7 +3486,7 @@ static int handle_get_microdesc(dir_connection_t *conn, const get_handler_args_t *args) { const char *url = args->url; - const int compressed = args->compressed; + const int compressed = args->compression_supported & (1u << ZLIB_METHOD); int clear_spool = 1; { conn->spool = smartlist_new(); @@ -3496,7 +3536,7 @@ static int handle_get_descriptor(dir_connection_t *conn, const get_handler_args_t *args) { const char *url = args->url; - const int compressed = args->compressed; + const int compressed = args->compression_supported & (1u << ZLIB_METHOD); const or_options_t *options = get_options(); int clear_spool = 1; if (!strcmpstart(url,"/tor/server/") || @@ -3589,7 +3629,7 @@ static int handle_get_keys(dir_connection_t *conn, const get_handler_args_t *args) { const char *url = args->url; - const int compressed = args->compressed; + const int compressed = args->compression_supported & (1u << ZLIB_METHOD); const time_t if_modified_since = args->if_modified_since; { smartlist_t *certs = smartlist_new(); diff --git a/src/or/directory.h b/src/or/directory.h index e4fbbe6891..8473d39e85 100644 --- a/src/or/directory.h +++ b/src/or/directory.h @@ -196,6 +196,7 @@ STATIC int next_random_exponential_delay(int delay, int max_delay); STATIC int parse_hs_version_from_post(const char *url, const char *prefix, const char **end_pos); +STATIC unsigned parse_accept_encoding_header(const char *h); #endif #endif diff --git a/src/test/test_dir_handle_get.c b/src/test/test_dir_handle_get.c index 392fa4dde0..7794bd7d65 100644 --- a/src/test/test_dir_handle_get.c +++ b/src/test/test_dir_handle_get.c @@ -2497,6 +2497,52 @@ test_dir_handle_get_status_vote_current_authority(void* data) dirvote_free_all(); } +static void +test_dir_handle_get_parse_accept_encoding(void *arg) +{ + (void)arg; + const unsigned B_NONE = 1u << NO_METHOD; + const unsigned B_ZLIB = 1u << ZLIB_METHOD; + const unsigned B_GZIP = 1u << GZIP_METHOD; + const unsigned B_LZMA = 1u << LZMA_METHOD; + const unsigned B_ZSTD = 1u << ZSTD_METHOD; + + unsigned encodings; + + encodings = parse_accept_encoding_header(""); + tt_uint_op(B_NONE, OP_EQ, encodings); + + encodings = parse_accept_encoding_header(" "); + tt_uint_op(B_NONE, OP_EQ, encodings); + + encodings = parse_accept_encoding_header("dewey, cheatham, and howe "); + tt_uint_op(B_NONE, OP_EQ, encodings); + + encodings = parse_accept_encoding_header("dewey, cheatham, and gzip"); + tt_uint_op(B_NONE, OP_EQ, encodings); + + encodings = parse_accept_encoding_header("dewey, cheatham, and, gzip"); + tt_uint_op(B_NONE|B_GZIP, OP_EQ, encodings); + + encodings = parse_accept_encoding_header(" gzip"); + tt_uint_op(B_NONE|B_GZIP, OP_EQ, encodings); + + encodings = parse_accept_encoding_header("gzip"); + tt_uint_op(B_NONE|B_GZIP, OP_EQ, encodings); + + encodings = parse_accept_encoding_header("x-zstd, deflate, x-lzma"); + tt_uint_op(B_NONE|B_ZLIB|B_ZSTD|B_LZMA, OP_EQ, encodings); + + encodings = parse_accept_encoding_header("x-zstd, deflate, x-lzma, gzip"); + tt_uint_op(B_NONE|B_ZLIB|B_ZSTD|B_LZMA|B_GZIP, OP_EQ, encodings); + + encodings = parse_accept_encoding_header("x-zstd,deflate,x-lzma,gzip"); + tt_uint_op(B_NONE|B_ZLIB|B_ZSTD|B_LZMA|B_GZIP, OP_EQ, encodings); + + done: + ; +} + #define DIR_HANDLE_CMD(name,flags) \ { #name, test_dir_handle_get_##name, (flags), NULL, NULL } @@ -2555,6 +2601,7 @@ struct testcase_t dir_handle_get_tests[] = { DIR_HANDLE_CMD(status_vote_next_consensus_signatures_not_found, 0), DIR_HANDLE_CMD(status_vote_next_consensus_signatures_busy, 0), DIR_HANDLE_CMD(status_vote_next_consensus_signatures, 0), + DIR_HANDLE_CMD(parse_accept_encoding, 0), END_OF_TESTCASES };