Merge branch 'unified_compress_squashed'

This commit is contained in:
Nick Mathewson 2017-04-25 10:51:13 -04:00
commit 49868340f7
9 changed files with 160 additions and 898 deletions

View File

@ -56,6 +56,147 @@ tor_compress_is_compression_bomb(size_t size_in, size_t size_out)
return (size_out / size_in > MAX_UNCOMPRESSION_FACTOR); return (size_out / size_in > MAX_UNCOMPRESSION_FACTOR);
} }
/** Guess the size that <b>in_len</b> will be after compression or
* decompression. */
static size_t
guess_compress_size(int compress, compress_method_t method,
compression_level_t compression_level,
size_t in_len)
{
// ignore these for now.
(void)method;
(void)compression_level;
/* Always guess a factor of 2. */
if (compress) {
in_len /= 2;
} else {
if (in_len < SIZE_T_CEILING/2)
in_len *= 2;
}
return MAX(in_len, 1024);
}
/** Internal function to implement tor_compress/tor_uncompress, depending on
* whether <b>compress</b> is set. All arguments are as for tor_compress or
* tor_uncompress. */
static int
tor_compress_impl(int compress,
char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method,
compression_level_t compression_level,
int complete_only,
int protocol_warn_level)
{
tor_compress_state_t *stream;
int rv;
stream = tor_compress_new(compress, method, compression_level);
if (stream == NULL)
return -1;
size_t in_len_orig = in_len;
size_t out_remaining, out_alloc;
char *outptr;
out_remaining = out_alloc =
guess_compress_size(compress, method, compression_level, in_len);
*out = outptr = tor_malloc(out_remaining);
const int finish = complete_only || compress;
while (1) {
switch (tor_compress_process(stream,
&outptr, &out_remaining,
&in, &in_len, finish)) {
case TOR_COMPRESS_DONE:
if (in_len == 0 || compress) {
goto done;
} else {
// More data is present, and we're decompressing. So we may need to
// reinitialize the stream if we are handling multiple concatenated
// inputs.
tor_compress_free(stream);
stream = tor_compress_new(compress, method, compression_level);
}
break;
case TOR_COMPRESS_OK:
if (compress || complete_only) {
goto err;
} else {
goto done;
}
break;
case TOR_COMPRESS_BUFFER_FULL: {
if (!compress && outptr < *out+out_alloc) {
// A buffer error in this case means that we have a problem
// with our input.
log_fn(protocol_warn_level, LD_PROTOCOL,
"Possible truncated or corrupt compressed data");
goto err;
}
if (out_alloc >= SIZE_T_CEILING / 2) {
log_warn(LD_GENERAL, "While %scompresing data: ran out of space.",
compress?"":"un");
goto err;
}
if (!compress &&
tor_compress_is_compression_bomb(in_len_orig, out_alloc)) {
// This should already have been caught down in the backend logic.
// LCOV_EXCL_START
tor_assert_nonfatal_unreached();
goto err;
// LCOV_EXCL_STOP
}
const size_t offset = outptr - *out;
out_alloc *= 2;
*out = tor_realloc(*out, out_alloc);
outptr = *out + offset;
out_remaining = out_alloc - offset;
break;
}
case TOR_COMPRESS_ERROR:
log_fn(protocol_warn_level, LD_GENERAL,
"Error while %scompresing data: bad input?",
compress?"":"un");
goto err; // bad data.
default:
// LCOV_EXCL_START
tor_assert_nonfatal_unreached();
goto err;
// LCOV_EXCL_STOP
}
}
done:
*out_len = outptr - *out;
if (compress && tor_compress_is_compression_bomb(*out_len, in_len_orig)) {
log_warn(LD_BUG, "We compressed something and got an insanely high "
"compression factor; other Tors would think this was a "
"compression bomb.");
goto err;
}
if (!compress) {
// NUL-terminate our output.
if (out_alloc == *out_len)
*out = tor_realloc(*out, out_alloc + 1);
(*out)[*out_len] = '\0';
}
rv = 0;
goto out;
err:
tor_free(*out);
*out_len = 0;
rv = -1;
goto out;
out:
tor_compress_free(stream);
return rv;
}
/** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly /** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly
* allocated buffer, using the method described in <b>method</b>. Store the * allocated buffer, using the method described in <b>method</b>. Store the
* compressed string in *<b>out</b>, and its length in *<b>out_len</b>. * compressed string in *<b>out</b>, and its length in *<b>out_len</b>.
@ -66,19 +207,9 @@ tor_compress(char **out, size_t *out_len,
const char *in, size_t in_len, const char *in, size_t in_len,
compress_method_t method) compress_method_t method)
{ {
switch (method) { return tor_compress_impl(1, out, out_len, in, in_len, method,
case GZIP_METHOD: BEST_COMPRESSION,
case ZLIB_METHOD: 1, LOG_WARN);
return tor_zlib_compress(out, out_len, in, in_len, method);
case LZMA_METHOD:
return tor_lzma_compress(out, out_len, in, in_len, method);
case ZSTD_METHOD:
return tor_zstd_compress(out, out_len, in, in_len, method);
case NO_METHOD:
case UNKNOWN_METHOD:
default:
return -1;
}
} }
/** Given zero or more zlib-compressed or gzip-compressed strings of /** Given zero or more zlib-compressed or gzip-compressed strings of
@ -99,28 +230,9 @@ tor_uncompress(char **out, size_t *out_len,
int complete_only, int complete_only,
int protocol_warn_level) int protocol_warn_level)
{ {
switch (method) { return tor_compress_impl(0, out, out_len, in, in_len, method,
case GZIP_METHOD: BEST_COMPRESSION,
case ZLIB_METHOD: complete_only, protocol_warn_level);
return tor_zlib_uncompress(out, out_len, in, in_len,
method,
complete_only,
protocol_warn_level);
case LZMA_METHOD:
return tor_lzma_uncompress(out, out_len, in, in_len,
method,
complete_only,
protocol_warn_level);
case ZSTD_METHOD:
return tor_zstd_uncompress(out, out_len, in, in_len,
method,
complete_only,
protocol_warn_level);
case NO_METHOD:
case UNKNOWN_METHOD:
default:
return -1;
}
} }
/** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely /** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely

View File

@ -26,11 +26,11 @@ typedef enum {
/** /**
* Enumeration to define tradeoffs between memory usage and compression level. * Enumeration to define tradeoffs between memory usage and compression level.
* HIGH_COMPRESSION saves the most bandwidth; LOW_COMPRESSION saves the most * BEST_COMPRESSION saves the most bandwidth; LOW_COMPRESSION saves the most
* memory. * memory.
**/ **/
typedef enum { typedef enum {
HIGH_COMPRESSION, MEDIUM_COMPRESSION, LOW_COMPRESSION BEST_COMPRESSION, HIGH_COMPRESSION, MEDIUM_COMPRESSION, LOW_COMPRESSION
} compression_level_t; } compression_level_t;
int tor_compress(char **out, size_t *out_len, int tor_compress(char **out, size_t *out_len,

View File

@ -32,6 +32,7 @@ memory_level(compression_level_t level)
{ {
switch (level) { switch (level) {
default: default:
case BEST_COMPRESSION:
case HIGH_COMPRESSION: return 9; case HIGH_COMPRESSION: return 9;
case MEDIUM_COMPRESSION: return 6; case MEDIUM_COMPRESSION: return 6;
case LOW_COMPRESSION: return 3; case LOW_COMPRESSION: return 3;
@ -108,296 +109,6 @@ tor_lzma_get_header_version_str(void)
#endif #endif
} }
/** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly
* allocated buffer, using the LZMA method. Store the compressed string in
* *<b>out</b>, and its length in *<b>out_len</b>. Return 0 on success, -1 on
* failure.
*/
int
tor_lzma_compress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method)
{
#ifdef HAVE_LZMA
lzma_stream stream = LZMA_STREAM_INIT;
lzma_options_lzma stream_options;
lzma_ret retval;
lzma_action action;
size_t out_size, old_size;
off_t offset;
tor_assert(out);
tor_assert(out_len);
tor_assert(in);
tor_assert(in_len < UINT_MAX);
tor_assert(method == LZMA_METHOD);
stream.next_in = (unsigned char *)in;
stream.avail_in = in_len;
lzma_lzma_preset(&stream_options,
memory_level(HIGH_COMPRESSION));
retval = lzma_alone_encoder(&stream, &stream_options);
if (retval != LZMA_OK) {
log_warn(LD_GENERAL, "Error from LZMA encoder: %s (%u).",
lzma_error_str(retval), retval);
goto err;
}
out_size = in_len / 2;
if (out_size < 1024)
out_size = 1024;
*out = tor_malloc(out_size);
stream.next_out = (unsigned char *)*out;
stream.avail_out = out_size;
action = LZMA_RUN;
while (1) {
retval = lzma_code(&stream, action);
switch (retval) {
case LZMA_OK:
action = LZMA_FINISH;
break;
case LZMA_STREAM_END:
goto done;
case LZMA_BUF_ERROR:
offset = stream.next_out - ((unsigned char *)*out);
old_size = out_size;
out_size *= 2;
if (out_size < old_size) {
log_warn(LD_GENERAL, "Size overflow in LZMA compression.");
goto err;
}
*out = tor_realloc(*out, out_size);
stream.next_out = (unsigned char *)(*out + offset);
if (out_size - offset > UINT_MAX) {
log_warn(LD_BUG, "Ran over unsigned int limit of LZMA while "
"compressing.");
goto err;
}
stream.avail_out = (unsigned int)(out_size - offset);
break;
// We list all the possible values of `lzma_ret` here to silence the
// `switch-enum` warning and to detect if a new member was added.
case LZMA_NO_CHECK:
case LZMA_UNSUPPORTED_CHECK:
case LZMA_GET_CHECK:
case LZMA_MEM_ERROR:
case LZMA_MEMLIMIT_ERROR:
case LZMA_FORMAT_ERROR:
case LZMA_OPTIONS_ERROR:
case LZMA_DATA_ERROR:
case LZMA_PROG_ERROR:
default:
log_warn(LD_GENERAL, "LZMA compression didn't finish: %s.",
lzma_error_str(retval));
goto err;
}
}
done:
*out_len = stream.total_out;
lzma_end(&stream);
if (tor_compress_is_compression_bomb(*out_len, in_len)) {
log_warn(LD_BUG, "We compressed something and got an insanely high "
"compression factor; other Tor instances would think "
"this is a compression bomb.");
goto err;
}
return 0;
err:
lzma_end(&stream);
tor_free(*out);
return -1;
#else // HAVE_LZMA.
(void)out;
(void)out_len;
(void)in;
(void)in_len;
(void)method;
return -1;
#endif // HAVE_LZMA.
}
/** Given an LZMA compressed string of total length <b>in_len</b> bytes at
* <b>in</b>, uncompress them into a newly allocated buffer. Store the
* uncompressed string in *<b>out</b>, and its length in *<b>out_len</b>.
* Return 0 on success, -1 on failure.
*
* If <b>complete_only</b> is true, we consider a truncated input as a failure;
* otherwise we decompress as much as we can. Warn about truncated or corrupt
* inputs at <b>protocol_warn_level</b>.
*/
int
tor_lzma_uncompress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method,
int complete_only,
int protocol_warn_level)
{
#ifdef HAVE_LZMA
lzma_stream stream = LZMA_STREAM_INIT;
lzma_ret retval;
lzma_action action;
size_t out_size, old_size;
off_t offset;
tor_assert(out);
tor_assert(out_len);
tor_assert(in);
tor_assert(in_len < UINT_MAX);
tor_assert(method == LZMA_METHOD);
stream.next_in = (unsigned char *)in;
stream.avail_in = in_len;
// FIXME(ahf): This should be something more sensible than
// UINT64_MAX: See #21665.
retval = lzma_alone_decoder(&stream, UINT64_MAX);
if (retval != LZMA_OK) {
log_warn(LD_GENERAL, "Error from LZMA decoder: %s (%u).",
lzma_error_str(retval), retval);
goto err;
}
out_size = in_len * 2;
if (out_size < 1024)
out_size = 1024;
if (out_size >= SIZE_T_CEILING || out_size > UINT_MAX)
goto err;
*out = tor_malloc(out_size);
stream.next_out = (unsigned char *)*out;
stream.avail_out = out_size;
// FIXME(ahf): We should figure out how to use LZMA_FULL_FLUSH to
// make the partial string read tests.
// action = complete_only ? LZMA_FINISH : LZMA_SYNC_FLUSH. // To do this,
// it seems like we have to use LZMA using their "xz" encoder instead of just
// regular LZMA.
(void)complete_only;
action = LZMA_FINISH;
while (1) {
retval = lzma_code(&stream, action);
switch (retval) {
case LZMA_STREAM_END:
if (stream.avail_in == 0)
goto done;
// We might have more data here. Reset our stream.
lzma_end(&stream);
retval = lzma_alone_decoder(&stream, UINT64_MAX);
if (retval != LZMA_OK) {
log_warn(LD_GENERAL, "Error from LZMA decoder: %s (%u).",
lzma_error_str(retval), retval);
goto err;
}
break;
case LZMA_OK:
break;
case LZMA_BUF_ERROR:
if (stream.avail_out > 0) {
log_fn(protocol_warn_level, LD_PROTOCOL,
"possible truncated or corrupt LZMA data.");
goto err;
}
offset = stream.next_out - (unsigned char *)*out;
old_size = out_size;
out_size *= 2;
if (out_size < old_size) {
log_warn(LD_GENERAL, "Size overflow in LZMA uncompression.");
goto err;
}
if (tor_compress_is_compression_bomb(in_len, out_size)) {
log_warn(LD_GENERAL, "Input looks like a possible LZMA compression "
"bomb. Not proceeding.");
goto err;
}
if (out_size >= SIZE_T_CEILING) {
log_warn(LD_BUG, "Hit SIZE_T_CEILING limit while uncompressing "
"LZMA data.");
goto err;
}
*out = tor_realloc(*out, out_size);
stream.next_out = (unsigned char *)(*out + offset);
if (out_size - offset > UINT_MAX) {
log_warn(LD_BUG, "Ran over unsigned int limit of LZMA while "
"uncompressing.");
goto err;
}
stream.avail_out = (unsigned int)(out_size - offset);
break;
// We list all the possible values of `lzma_ret` here to silence the
// `switch-enum` warning and to detect if a new member was added.
case LZMA_NO_CHECK:
case LZMA_UNSUPPORTED_CHECK:
case LZMA_GET_CHECK:
case LZMA_MEM_ERROR:
case LZMA_MEMLIMIT_ERROR:
case LZMA_FORMAT_ERROR:
case LZMA_OPTIONS_ERROR:
case LZMA_DATA_ERROR:
case LZMA_PROG_ERROR:
default:
log_warn(LD_GENERAL, "LZMA decompression didn't finish: %s.",
lzma_error_str(retval));
goto err;
}
}
done:
*out_len = stream.next_out - (unsigned char*)*out;
lzma_end(&stream);
// NUL-terminate our output.
if (out_size == *out_len)
*out = tor_realloc(*out, out_size + 1);
(*out)[*out_len] = '\0';
return 0;
err:
lzma_end(&stream);
tor_free(*out);
return -1;
#else // HAVE_LZMA.
(void)out;
(void)out_len;
(void)in;
(void)in_len;
(void)method;
(void)complete_only;
(void)protocol_warn_level;
return -1;
#endif // HAVE_LZMA.
}
/** Internal LZMA state for incremental compression/decompression. /** Internal LZMA state for incremental compression/decompression.
* The body of this struct is not exposed. */ * The body of this struct is not exposed. */
struct tor_lzma_compress_state_t { struct tor_lzma_compress_state_t {

View File

@ -17,16 +17,6 @@ const char *tor_lzma_get_version_str(void);
const char *tor_lzma_get_header_version_str(void); const char *tor_lzma_get_header_version_str(void);
int tor_lzma_compress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method);
int tor_lzma_uncompress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method,
int complete_only,
int protocol_warn_level);
/** Internal state for an incremental LZMA compression/decompression. */ /** Internal state for an incremental LZMA compression/decompression. */
typedef struct tor_lzma_compress_state_t tor_lzma_compress_state_t; typedef struct tor_lzma_compress_state_t tor_lzma_compress_state_t;

View File

@ -56,6 +56,7 @@ memory_level(compression_level_t level)
{ {
switch (level) { switch (level) {
default: default:
case BEST_COMPRESSION: return 9;
case HIGH_COMPRESSION: return 8; case HIGH_COMPRESSION: return 8;
case MEDIUM_COMPRESSION: return 7; case MEDIUM_COMPRESSION: return 7;
case LOW_COMPRESSION: return 6; case LOW_COMPRESSION: return 6;
@ -70,6 +71,7 @@ method_bits(compress_method_t method, compression_level_t level)
const int flag = method == GZIP_METHOD ? 16 : 0; const int flag = method == GZIP_METHOD ? 16 : 0;
switch (level) { switch (level) {
default: default:
case BEST_COMPRESSION:
case HIGH_COMPRESSION: return flag + 15; case HIGH_COMPRESSION: return flag + 15;
case MEDIUM_COMPRESSION: return flag + 13; case MEDIUM_COMPRESSION: return flag + 13;
case LOW_COMPRESSION: return flag + 11; case LOW_COMPRESSION: return flag + 11;
@ -102,260 +104,6 @@ tor_zlib_get_header_version_str(void)
return ZLIB_VERSION; return ZLIB_VERSION;
} }
/** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly
* allocated buffer, using the method described in <b>method</b>. Store the
* compressed string in *<b>out</b>, and its length in *<b>out_len</b>. Return
* 0 on success, -1 on failure.
*/
int
tor_zlib_compress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method)
{
struct z_stream_s *stream = NULL;
size_t out_size, old_size;
off_t offset;
tor_assert(out);
tor_assert(out_len);
tor_assert(in);
tor_assert(in_len < UINT_MAX);
*out = NULL;
stream = tor_malloc_zero(sizeof(struct z_stream_s));
stream->zalloc = Z_NULL;
stream->zfree = Z_NULL;
stream->opaque = NULL;
stream->next_in = (unsigned char*) in;
stream->avail_in = (unsigned int)in_len;
if (deflateInit2(stream, Z_BEST_COMPRESSION, Z_DEFLATED,
method_bits(method, HIGH_COMPRESSION),
memory_level(HIGH_COMPRESSION),
Z_DEFAULT_STRATEGY) != Z_OK) {
//LCOV_EXCL_START -- we can only provoke failure by giving junk arguments.
log_warn(LD_GENERAL, "Error from deflateInit2: %s",
stream->msg?stream->msg:"<no message>");
goto err;
//LCOV_EXCL_STOP
}
/* Guess 50% compression. */
out_size = in_len / 2;
if (out_size < 1024) out_size = 1024;
*out = tor_malloc(out_size);
stream->next_out = (unsigned char*)*out;
stream->avail_out = (unsigned int)out_size;
while (1) {
switch (deflate(stream, Z_FINISH))
{
case Z_STREAM_END:
goto done;
case Z_OK:
/* In case zlib doesn't work as I think .... */
if (stream->avail_out >= stream->avail_in+16)
break;
case Z_BUF_ERROR:
offset = stream->next_out - ((unsigned char*)*out);
old_size = out_size;
out_size *= 2;
if (out_size < old_size) {
log_warn(LD_GENERAL, "Size overflow in compression.");
goto err;
}
*out = tor_realloc(*out, out_size);
stream->next_out = (unsigned char*)(*out + offset);
if (out_size - offset > UINT_MAX) {
log_warn(LD_BUG, "Ran over unsigned int limit of zlib while "
"uncompressing.");
goto err;
}
stream->avail_out = (unsigned int)(out_size - offset);
break;
default:
log_warn(LD_GENERAL, "Gzip compression didn't finish: %s",
stream->msg ? stream->msg : "<no message>");
goto err;
}
}
done:
*out_len = stream->total_out;
#if defined(OpenBSD)
/* "Hey Rocky! Watch me change an unsigned field to a signed field in a
* third-party API!"
* "Oh, that trick will just make people do unsafe casts to the unsigned
* type in their cross-platform code!"
* "Don't be foolish. I'm _sure_ they'll have the good sense to make sure
* the newly unsigned field isn't negative." */
tor_assert(stream->total_out >= 0);
#endif
if (deflateEnd(stream)!=Z_OK) {
// LCOV_EXCL_START -- unreachable if we handled the zlib structure right
tor_assert_nonfatal_unreached();
log_warn(LD_BUG, "Error freeing gzip structures");
goto err;
// LCOV_EXCL_STOP
}
tor_free(stream);
if (tor_compress_is_compression_bomb(*out_len, in_len)) {
log_warn(LD_BUG, "We compressed something and got an insanely high "
"compression factor; other Tors would think this was a zlib bomb.");
goto err;
}
return 0;
err:
if (stream) {
deflateEnd(stream);
tor_free(stream);
}
tor_free(*out);
return -1;
}
/** Given an Zlib/Gzip compressed string of total length <b>in_len</b> bytes
* at <b>in</b>, uncompress them into a newly allocated buffer. Store the
* uncompressed string in *<b>out</b>, and its length in *<b>out_len</b>.
* Return 0 on success, -1 on failure.
*
* If <b>complete_only</b> is true, we consider a truncated input as a failure;
* otherwise we decompress as much as we can. Warn about truncated or corrupt
* inputs at <b>protocol_warn_level</b>.
*/
int
tor_zlib_uncompress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method,
int complete_only,
int protocol_warn_level)
{
struct z_stream_s *stream = NULL;
size_t out_size, old_size;
off_t offset;
int r;
tor_assert(out);
tor_assert(out_len);
tor_assert(in);
tor_assert(in_len < UINT_MAX);
*out = NULL;
stream = tor_malloc_zero(sizeof(struct z_stream_s));
stream->zalloc = Z_NULL;
stream->zfree = Z_NULL;
stream->opaque = NULL;
stream->next_in = (unsigned char*) in;
stream->avail_in = (unsigned int)in_len;
if (inflateInit2(stream,
method_bits(method, HIGH_COMPRESSION)) != Z_OK) {
// LCOV_EXCL_START -- can only hit this if we give bad inputs.
log_warn(LD_GENERAL, "Error from inflateInit2: %s",
stream->msg?stream->msg:"<no message>");
goto err;
// LCOV_EXCL_STOP
}
out_size = in_len * 2; /* guess 50% compression. */
if (out_size < 1024) out_size = 1024;
if (out_size >= SIZE_T_CEILING || out_size > UINT_MAX)
goto err;
*out = tor_malloc(out_size);
stream->next_out = (unsigned char*)*out;
stream->avail_out = (unsigned int)out_size;
while (1) {
switch (inflate(stream, complete_only ? Z_FINISH : Z_SYNC_FLUSH))
{
case Z_STREAM_END:
if (stream->avail_in == 0)
goto done;
/* There may be more compressed data here. */
if ((r = inflateEnd(stream)) != Z_OK) {
log_warn(LD_BUG, "Error freeing gzip structures");
goto err;
}
if (inflateInit2(stream,
method_bits(method,HIGH_COMPRESSION)) != Z_OK) {
log_warn(LD_GENERAL, "Error from second inflateInit2: %s",
stream->msg?stream->msg:"<no message>");
goto err;
}
break;
case Z_OK:
if (!complete_only && stream->avail_in == 0)
goto done;
/* In case zlib doesn't work as I think.... */
if (stream->avail_out >= stream->avail_in+16)
break;
case Z_BUF_ERROR:
if (stream->avail_out > 0) {
log_fn(protocol_warn_level, LD_PROTOCOL,
"possible truncated or corrupt zlib data");
goto err;
}
offset = stream->next_out - (unsigned char*)*out;
old_size = out_size;
out_size *= 2;
if (out_size < old_size) {
log_warn(LD_GENERAL, "Size overflow in uncompression.");
goto err;
}
if (tor_compress_is_compression_bomb(in_len, out_size)) {
log_warn(LD_GENERAL, "Input looks like a possible zlib bomb; "
"not proceeding.");
goto err;
}
if (out_size >= SIZE_T_CEILING) {
log_warn(LD_BUG, "Hit SIZE_T_CEILING limit while uncompressing.");
goto err;
}
*out = tor_realloc(*out, out_size);
stream->next_out = (unsigned char*)(*out + offset);
if (out_size - offset > UINT_MAX) {
log_warn(LD_BUG, "Ran over unsigned int limit of zlib while "
"uncompressing.");
goto err;
}
stream->avail_out = (unsigned int)(out_size - offset);
break;
default:
log_warn(LD_GENERAL, "Gzip decompression returned an error: %s",
stream->msg ? stream->msg : "<no message>");
goto err;
}
}
done:
*out_len = stream->next_out - (unsigned char*)*out;
r = inflateEnd(stream);
tor_free(stream);
if (r != Z_OK) {
log_warn(LD_BUG, "Error freeing gzip structures");
goto err;
}
/* NUL-terminate output. */
if (out_size == *out_len)
*out = tor_realloc(*out, out_size + 1);
(*out)[*out_len] = '\0';
return 0;
err:
if (stream) {
inflateEnd(stream);
tor_free(stream);
}
if (*out) {
tor_free(*out);
}
return -1;
}
/** Internal zlib state for an incremental compression/decompression. /** Internal zlib state for an incremental compression/decompression.
* The body of this struct is not exposed. */ * The body of this struct is not exposed. */
struct tor_zlib_compress_state_t { struct tor_zlib_compress_state_t {
@ -416,7 +164,7 @@ tor_zlib_compress_new(int compress_,
if (! compress_) { if (! compress_) {
/* use this setting for decompression, since we might have the /* use this setting for decompression, since we might have the
* max number of window bits */ * max number of window bits */
compression_level = HIGH_COMPRESSION; compression_level = BEST_COMPRESSION;
} }
out = tor_malloc_zero(sizeof(tor_zlib_compress_state_t)); out = tor_malloc_zero(sizeof(tor_zlib_compress_state_t));
@ -465,8 +213,11 @@ tor_zlib_compress_process(tor_zlib_compress_state_t *state,
{ {
int err; int err;
tor_assert(state != NULL); tor_assert(state != NULL);
tor_assert(*in_len <= UINT_MAX); if (*in_len > UINT_MAX ||
tor_assert(*out_len <= UINT_MAX); *out_len > UINT_MAX) {
return TOR_COMPRESS_ERROR;
}
state->stream.next_in = (unsigned char*) *in; state->stream.next_in = (unsigned char*) *in;
state->stream.avail_in = (unsigned int)*in_len; state->stream.avail_in = (unsigned int)*in_len;
state->stream.next_out = (unsigned char*) *out; state->stream.next_out = (unsigned char*) *out;

View File

@ -17,16 +17,6 @@ const char *tor_zlib_get_version_str(void);
const char *tor_zlib_get_header_version_str(void); const char *tor_zlib_get_header_version_str(void);
int tor_zlib_compress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method);
int tor_zlib_uncompress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method,
int complete_only,
int protocol_warn_level);
/** Internal state for an incremental zlib/gzip compression/decompression. */ /** Internal state for an incremental zlib/gzip compression/decompression. */
typedef struct tor_zlib_compress_state_t tor_zlib_compress_state_t; typedef struct tor_zlib_compress_state_t tor_zlib_compress_state_t;

View File

@ -33,6 +33,7 @@ memory_level(compression_level_t level)
{ {
switch (level) { switch (level) {
default: default:
case BEST_COMPRESSION:
case HIGH_COMPRESSION: return 9; case HIGH_COMPRESSION: return 9;
case MEDIUM_COMPRESSION: return 8; case MEDIUM_COMPRESSION: return 8;
case LOW_COMPRESSION: return 7; case LOW_COMPRESSION: return 7;
@ -85,289 +86,6 @@ tor_zstd_get_header_version_str(void)
#endif #endif
} }
/** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly
* allocated buffer, using the Zstandard method. Store the compressed string
* in *<b>out</b>, and its length in *<b>out_len</b>. Return 0 on success, -1
* on failure.
*/
int
tor_zstd_compress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method)
{
#ifdef HAVE_ZSTD
ZSTD_CStream *stream = NULL;
size_t out_size, old_size;
size_t retval;
tor_assert(out);
tor_assert(out_len);
tor_assert(in);
tor_assert(in_len < UINT_MAX);
tor_assert(method == ZSTD_METHOD);
*out = NULL;
stream = ZSTD_createCStream();
if (stream == NULL) {
// Zstandard does not give us any useful error message to why this
// happened. See https://github.com/facebook/zstd/issues/398
log_warn(LD_GENERAL, "Error while creating Zstandard stream");
goto err;
}
retval = ZSTD_initCStream(stream,
memory_level(HIGH_COMPRESSION));
if (ZSTD_isError(retval)) {
log_warn(LD_GENERAL, "Zstandard stream initialization error: %s",
ZSTD_getErrorName(retval));
goto err;
}
// Assume 50% compression and update our buffer in case we need to.
out_size = in_len / 2;
if (out_size < 1024)
out_size = 1024;
*out = tor_malloc(out_size);
*out_len = 0;
ZSTD_inBuffer input = { in, in_len, 0 };
ZSTD_outBuffer output = { *out, out_size, 0 };
while (input.pos < input.size) {
retval = ZSTD_compressStream(stream, &output, &input);
if (ZSTD_isError(retval)) {
log_warn(LD_GENERAL, "Zstandard stream compression error: %s",
ZSTD_getErrorName(retval));
goto err;
}
if (input.pos < input.size && output.pos == output.size) {
old_size = out_size;
out_size *= 2;
if (out_size < old_size) {
log_warn(LD_GENERAL, "Size overflow in Zstandard compression.");
goto err;
}
if (out_size - output.pos > UINT_MAX) {
log_warn(LD_BUG, "Ran over unsigned int limit of Zstandard while "
"compressing.");
goto err;
}
output.dst = *out = tor_realloc(*out, out_size);
output.size = out_size;
}
}
while (1) {
retval = ZSTD_endStream(stream, &output);
if (retval == 0)
break;
if (ZSTD_isError(retval)) {
log_warn(LD_GENERAL, "Zstandard stream error: %s",
ZSTD_getErrorName(retval));
goto err;
}
if (output.pos == output.size) {
old_size = out_size;
out_size *= 2;
if (out_size < old_size) {
log_warn(LD_GENERAL, "Size overflow in Zstandard compression.");
goto err;
}
if (out_size - output.pos > UINT_MAX) {
log_warn(LD_BUG, "Ran over unsigned int limit of Zstandard while "
"compressing.");
goto err;
}
output.dst = *out = tor_realloc(*out, out_size);
output.size = out_size;
}
}
*out_len = output.pos;
if (tor_compress_is_compression_bomb(*out_len, in_len)) {
log_warn(LD_BUG, "We compressed something and got an insanely high "
"compression factor; other Tor instances would think "
"this is a compression bomb.");
goto err;
}
if (stream != NULL) {
ZSTD_freeCStream(stream);
}
return 0;
err:
if (stream != NULL) {
ZSTD_freeCStream(stream);
}
tor_free(*out);
return -1;
#else // HAVE_ZSTD.
(void)out;
(void)out_len;
(void)in;
(void)in_len;
(void)method;
return -1;
#endif // HAVE_ZSTD.
}
/** Given a Zstandard compressed string of total length <b>in_len</b> bytes at
* <b>in</b>, uncompress them into a newly allocated buffer. Store the
* uncompressed string in *<b>out</b>, and its length in *<b>out_len</b>.
* Return 0 on success, -1 on failure.
*
* If <b>complete_only</b> is true, we consider a truncated input as a failure;
* otherwise we decompress as much as we can. Warn about truncated or corrupt
* inputs at <b>protocol_warn_level</b>.
*/
int
tor_zstd_uncompress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method,
int complete_only,
int protocol_warn_level)
{
#ifdef HAVE_ZSTD
ZSTD_DStream *stream = NULL;
size_t retval;
size_t out_size, old_size;
tor_assert(out);
tor_assert(out_len);
tor_assert(in);
tor_assert(in_len < UINT_MAX);
tor_assert(method == ZSTD_METHOD);
// FIXME(ahf): Handle this?
(void)complete_only;
(void)protocol_warn_level;
*out = NULL;
stream = ZSTD_createDStream();
if (stream == NULL) {
// Zstandard does not give us any useful error message to why this
// happened. See https://github.com/facebook/zstd/issues/398
log_warn(LD_GENERAL, "Error while creating Zstandard stream");
goto err;
}
retval = ZSTD_initDStream(stream);
if (ZSTD_isError(retval)) {
log_warn(LD_GENERAL, "Zstandard stream initialization error: %s",
ZSTD_getErrorName(retval));
goto err;
}
out_size = in_len * 2;
if (out_size < 1024)
out_size = 1024;
if (out_size >= SIZE_T_CEILING || out_size > UINT_MAX)
goto err;
*out = tor_malloc(out_size);
*out_len = 0;
ZSTD_inBuffer input = { in, in_len, 0 };
ZSTD_outBuffer output = { *out, out_size, 0 };
while (input.pos < input.size) {
retval = ZSTD_decompressStream(stream, &output, &input);
if (ZSTD_isError(retval)) {
log_warn(LD_GENERAL, "Zstandard stream decompression error: %s",
ZSTD_getErrorName(retval));
goto err;
}
if (input.pos < input.size && output.pos == output.size) {
old_size = out_size;
out_size *= 2;
if (out_size < old_size) {
log_warn(LD_GENERAL, "Size overflow in Zstandard compression.");
goto err;
}
if (tor_compress_is_compression_bomb(in_len, out_size)) {
log_warn(LD_GENERAL, "Input looks like a possible Zstandard "
"compression bomb. Not proceeding.");
goto err;
}
if (out_size >= SIZE_T_CEILING) {
log_warn(LD_BUG, "Hit SIZE_T_CEILING limit while uncompressing "
"Zstandard data.");
goto err;
}
if (out_size - output.pos > UINT_MAX) {
log_warn(LD_BUG, "Ran over unsigned int limit of Zstandard while "
"decompressing.");
goto err;
}
output.dst = *out = tor_realloc(*out, out_size);
output.size = out_size;
}
}
*out_len = output.pos;
if (stream != NULL) {
ZSTD_freeDStream(stream);
}
// NUL-terminate our output.
if (out_size == *out_len)
*out = tor_realloc(*out, out_size + 1);
(*out)[*out_len] = '\0';
return 0;
err:
if (stream != NULL) {
ZSTD_freeDStream(stream);
}
tor_free(*out);
return -1;
#else // HAVE_ZSTD.
(void)out;
(void)out_len;
(void)in;
(void)in_len;
(void)method;
(void)complete_only;
(void)protocol_warn_level;
return -1;
#endif // HAVE_ZSTD.
}
/** Internal Zstandard state for incremental compression/decompression. /** Internal Zstandard state for incremental compression/decompression.
* The body of this struct is not exposed. */ * The body of this struct is not exposed. */
struct tor_zstd_compress_state_t { struct tor_zstd_compress_state_t {

View File

@ -17,16 +17,6 @@ const char *tor_zstd_get_version_str(void);
const char *tor_zstd_get_header_version_str(void); const char *tor_zstd_get_header_version_str(void);
int tor_zstd_compress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method);
int tor_zstd_uncompress(char **out, size_t *out_len,
const char *in, size_t in_len,
compress_method_t method,
int complete_only,
int protocol_warn_level);
/** Internal state for an incremental Zstandard compression/decompression. */ /** Internal state for an incremental Zstandard compression/decompression. */
typedef struct tor_zstd_compress_state_t tor_zstd_compress_state_t; typedef struct tor_zstd_compress_state_t tor_zstd_compress_state_t;

View File

@ -2377,7 +2377,7 @@ test_util_gzip_compression_bomb(void *arg)
expect_single_log_msg_containing( expect_single_log_msg_containing(
"We compressed something and got an insanely high " "We compressed something and got an insanely high "
"compression factor; other Tors would think this " "compression factor; other Tors would think this "
"was a zlib bomb."); "was a compression bomb.");
teardown_capture_of_logs(); teardown_capture_of_logs();
/* Here's a compression bomb that we made manually. */ /* Here's a compression bomb that we made manually. */