Pre-compress consensus diffs with supported consensus methods.

This commit is contained in:
Nick Mathewson 2017-04-25 15:36:13 -04:00
parent a1172b6774
commit 1e1581a24e
3 changed files with 176 additions and 66 deletions

View File

@ -74,6 +74,25 @@ typedef enum cdm_diff_status_t {
CDM_DIFF_ERROR=3, CDM_DIFF_ERROR=3,
} cdm_diff_status_t; } cdm_diff_status_t;
/** Which methods do we use for precompressing diffs? */
static const compress_method_t compress_diffs_with[] = {
NO_METHOD,
GZIP_METHOD,
#ifdef HAVE_LZMA
LZMA_METHOD,
#endif
#ifdef HAVE_ZSTD
ZSTD_METHOD,
#endif
};
/** How many different methods will we try to use for diff compression? */
STATIC unsigned
n_diff_compression_methods(void)
{
return ARRAY_LENGTH(compress_diffs_with);
}
/** Hashtable node used to remember the current status of the diff /** Hashtable node used to remember the current status of the diff
* from a given sha3 digest to the current consensus. */ * from a given sha3 digest to the current consensus. */
typedef struct cdm_diff_t { typedef struct cdm_diff_t {
@ -84,12 +103,15 @@ typedef struct cdm_diff_t {
/** SHA3-256 digest of the consensus that this diff is _from_. (part of the /** SHA3-256 digest of the consensus that this diff is _from_. (part of the
* ht key) */ * ht key) */
uint8_t from_sha3[DIGEST256_LEN]; uint8_t from_sha3[DIGEST256_LEN];
/** Method by which the diff is compressed. (part of the ht key */
compress_method_t compress_method;
/** One of the CDM_DIFF_* values, depending on whether this diff /** One of the CDM_DIFF_* values, depending on whether this diff
* is available, in progress, or impossible to compute. */ * is available, in progress, or impossible to compute. */
cdm_diff_status_t cdm_diff_status; cdm_diff_status_t cdm_diff_status;
/** SHA3-256 digest of the consensus that this diff is _to. */ /** SHA3-256 digest of the consensus that this diff is _to. */
uint8_t target_sha3[DIGEST256_LEN]; uint8_t target_sha3[DIGEST256_LEN];
/** Handle to the cache entry for this diff, if any. We use a handle here /** Handle to the cache entry for this diff, if any. We use a handle here
* to avoid thinking too hard about cache entry lifetime issues. */ * to avoid thinking too hard about cache entry lifetime issues. */
consensus_cache_entry_handle_t *entry; consensus_cache_entry_handle_t *entry;
@ -121,9 +143,10 @@ static void consdiffmgr_set_cache_flags(void);
static unsigned static unsigned
cdm_diff_hash(const cdm_diff_t *diff) cdm_diff_hash(const cdm_diff_t *diff)
{ {
uint8_t tmp[DIGEST256_LEN + 1]; uint8_t tmp[DIGEST256_LEN + 2];
memcpy(tmp, diff->from_sha3, DIGEST256_LEN); memcpy(tmp, diff->from_sha3, DIGEST256_LEN);
tmp[DIGEST256_LEN] = (uint8_t) diff->flavor; tmp[DIGEST256_LEN] = (uint8_t) diff->flavor;
tmp[DIGEST256_LEN+1] = (uint8_t) diff->compress_method;
return (unsigned) siphash24g(tmp, sizeof(tmp)); return (unsigned) siphash24g(tmp, sizeof(tmp));
} }
/** Helper: compare two cdm_diff_t objects for key equality */ /** Helper: compare two cdm_diff_t objects for key equality */
@ -131,7 +154,8 @@ static int
cdm_diff_eq(const cdm_diff_t *diff1, const cdm_diff_t *diff2) cdm_diff_eq(const cdm_diff_t *diff1, const cdm_diff_t *diff2)
{ {
return fast_memeq(diff1->from_sha3, diff2->from_sha3, DIGEST256_LEN) && return fast_memeq(diff1->from_sha3, diff2->from_sha3, DIGEST256_LEN) &&
diff1->flavor == diff2->flavor; diff1->flavor == diff2->flavor &&
diff1->compress_method == diff2->compress_method;
} }
HT_PROTOTYPE(cdm_diff_ht, cdm_diff_t, node, cdm_diff_hash, cdm_diff_eq) HT_PROTOTYPE(cdm_diff_ht, cdm_diff_t, node, cdm_diff_hash, cdm_diff_eq)
@ -153,13 +177,15 @@ cdm_diff_free(cdm_diff_t *diff)
static cdm_diff_t * static cdm_diff_t *
cdm_diff_new(consensus_flavor_t flav, cdm_diff_new(consensus_flavor_t flav,
const uint8_t *from_sha3, const uint8_t *from_sha3,
const uint8_t *target_sha3) const uint8_t *target_sha3,
compress_method_t method)
{ {
cdm_diff_t *ent; cdm_diff_t *ent;
ent = tor_malloc_zero(sizeof(cdm_diff_t)); ent = tor_malloc_zero(sizeof(cdm_diff_t));
ent->flavor = flav; ent->flavor = flav;
memcpy(ent->from_sha3, from_sha3, DIGEST256_LEN); memcpy(ent->from_sha3, from_sha3, DIGEST256_LEN);
memcpy(ent->target_sha3, target_sha3, DIGEST256_LEN); memcpy(ent->target_sha3, target_sha3, DIGEST256_LEN);
ent->compress_method = method;
return ent; return ent;
} }
@ -177,18 +203,25 @@ cdm_diff_ht_check_and_note_pending(consensus_flavor_t flav,
const uint8_t *target_sha3) const uint8_t *target_sha3)
{ {
struct cdm_diff_t search, *ent; struct cdm_diff_t search, *ent;
unsigned u;
int result = 0;
for (u = 0; u < n_diff_compression_methods(); ++u) {
compress_method_t method = compress_diffs_with[u];
memset(&search, 0, sizeof(cdm_diff_t)); memset(&search, 0, sizeof(cdm_diff_t));
search.flavor = flav; search.flavor = flav;
search.compress_method = method;
memcpy(search.from_sha3, from_sha3, DIGEST256_LEN); memcpy(search.from_sha3, from_sha3, DIGEST256_LEN);
ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search);
if (ent) { if (ent) {
tor_assert_nonfatal(ent->cdm_diff_status != CDM_DIFF_PRESENT); tor_assert_nonfatal(ent->cdm_diff_status != CDM_DIFF_PRESENT);
return 1; result = 1;
continue;
} }
ent = cdm_diff_new(flav, from_sha3, target_sha3); ent = cdm_diff_new(flav, from_sha3, target_sha3, method);
ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS; ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS;
HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent); HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent);
return 0; }
return result;
} }
/** /**
@ -201,16 +234,18 @@ static void
cdm_diff_ht_set_status(consensus_flavor_t flav, cdm_diff_ht_set_status(consensus_flavor_t flav,
const uint8_t *from_sha3, const uint8_t *from_sha3,
const uint8_t *to_sha3, const uint8_t *to_sha3,
compress_method_t method,
int status, int status,
consensus_cache_entry_handle_t *handle) consensus_cache_entry_handle_t *handle)
{ {
struct cdm_diff_t search, *ent; struct cdm_diff_t search, *ent;
memset(&search, 0, sizeof(cdm_diff_t)); memset(&search, 0, sizeof(cdm_diff_t));
search.flavor = flav; search.flavor = flav;
search.compress_method = method,
memcpy(search.from_sha3, from_sha3, DIGEST256_LEN); memcpy(search.from_sha3, from_sha3, DIGEST256_LEN);
ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search);
if (!ent) { if (!ent) {
ent = cdm_diff_new(flav, from_sha3, to_sha3); ent = cdm_diff_new(flav, from_sha3, to_sha3, method);
ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS; ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS;
HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent); HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent);
} else if (fast_memneq(ent->target_sha3, to_sha3, DIGEST256_LEN)) { } else if (fast_memneq(ent->target_sha3, to_sha3, DIGEST256_LEN)) {
@ -500,7 +535,8 @@ consdiffmgr_find_diff_from(consensus_cache_entry_t **entry_out,
consensus_flavor_t flavor, consensus_flavor_t flavor,
int digest_type, int digest_type,
const uint8_t *digest, const uint8_t *digest,
size_t digestlen) size_t digestlen,
compress_method_t method)
{ {
if (BUG(digest_type != DIGEST_SHA3_256) || if (BUG(digest_type != DIGEST_SHA3_256) ||
BUG(digestlen != DIGEST256_LEN)) { BUG(digestlen != DIGEST256_LEN)) {
@ -511,6 +547,7 @@ consdiffmgr_find_diff_from(consensus_cache_entry_t **entry_out,
cdm_diff_t search, *ent; cdm_diff_t search, *ent;
memset(&search, 0, sizeof(search)); memset(&search, 0, sizeof(search));
search.flavor = flavor; search.flavor = flavor;
search.compress_method = method;
memcpy(search.from_sha3, digest, DIGEST256_LEN); memcpy(search.from_sha3, digest, DIGEST256_LEN);
ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search);
@ -820,6 +857,16 @@ consdiffmgr_diffs_load(void)
int flavor = networkstatus_parse_flavor_name(lv_flavor); int flavor = networkstatus_parse_flavor_name(lv_flavor);
if (flavor < 0) if (flavor < 0)
continue; continue;
const char *lv_compression =
consensus_cache_entry_get_value(diff, LABEL_COMPRESSION_TYPE);
compress_method_t method = NO_METHOD;
if (lv_compression) {
method = compression_method_get_by_name(lv_compression);
if (method == UNKNOWN_METHOD) {
continue;
}
}
uint8_t from_sha3[DIGEST256_LEN]; uint8_t from_sha3[DIGEST256_LEN];
uint8_t to_sha3[DIGEST256_LEN]; uint8_t to_sha3[DIGEST256_LEN];
if (cdm_entry_get_sha3_value(from_sha3, diff, LABEL_FROM_SHA3_DIGEST)<0) if (cdm_entry_get_sha3_value(from_sha3, diff, LABEL_FROM_SHA3_DIGEST)<0)
@ -828,6 +875,7 @@ consdiffmgr_diffs_load(void)
continue; continue;
cdm_diff_ht_set_status(flavor, from_sha3, to_sha3, cdm_diff_ht_set_status(flavor, from_sha3, to_sha3,
method,
CDM_DIFF_PRESENT, CDM_DIFF_PRESENT,
consensus_cache_entry_handle_new(diff)); consensus_cache_entry_handle_new(diff));
} SMARTLIST_FOREACH_END(diff); } SMARTLIST_FOREACH_END(diff);
@ -896,6 +944,18 @@ consdiffmgr_free_all(void)
Thread workers Thread workers
=====*/ =====*/
typedef struct compressed_result_t {
config_line_t *labels;
/**
* Output: Body of the diff, as compressed.
*/
uint8_t *body;
/**
* Output: length of body_out
*/
size_t bodylen;
} compressed_result_t;
/** /**
* An object passed to a worker thread that will try to produce a consensus * An object passed to a worker thread that will try to produce a consensus
* diff. * diff.
@ -914,18 +974,8 @@ typedef struct consensus_diff_worker_job_t {
*/ */
consensus_cache_entry_t *diff_to; consensus_cache_entry_t *diff_to;
/** /** Output: labels and bodies */
* Output: Labels to store in the cache associated with this diff. compressed_result_t out[ARRAY_LENGTH(compress_diffs_with)];
*/
config_line_t *labels_out;
/**
* Output: Body of the diff
*/
uint8_t *body_out;
/**
* Output: length of body_out
*/
size_t bodylen_out;
} consensus_diff_worker_job_t; } consensus_diff_worker_job_t;
/** Given a consensus_cache_entry_t, check whether it has a label claiming /** Given a consensus_cache_entry_t, check whether it has a label claiming
@ -1040,23 +1090,55 @@ consensus_diff_worker_threadfn(void *state_, void *work_)
return WQ_RPL_REPLY; return WQ_RPL_REPLY;
} }
/* Send the reply */ /* Compress the results and send the reply */
job->body_out = (uint8_t *) consensus_diff; tor_assert(compress_diffs_with[0] == NO_METHOD);
job->bodylen_out = strlen(consensus_diff); size_t difflen = strlen(consensus_diff);
job->out[0].body = (uint8_t *) consensus_diff;
job->out[0].bodylen = difflen;
cdm_labels_prepend_sha3(&job->labels_out, LABEL_SHA3_DIGEST, config_line_t *common_labels = NULL;
job->body_out, job->bodylen_out); cdm_labels_prepend_sha3(&common_labels,
cdm_labels_prepend_sha3(&job->labels_out, LABEL_SHA3_DIGEST_UNCOMPRESSED, LABEL_SHA3_DIGEST_UNCOMPRESSED,
job->body_out, job->bodylen_out); job->out[0].body,
config_line_prepend(&job->labels_out, LABEL_FROM_VALID_AFTER, job->out[0].bodylen);
config_line_prepend(&common_labels, LABEL_FROM_VALID_AFTER,
lv_from_valid_after); lv_from_valid_after);
config_line_prepend(&job->labels_out, LABEL_VALID_AFTER, lv_to_valid_after); config_line_prepend(&common_labels, LABEL_VALID_AFTER,
config_line_prepend(&job->labels_out, LABEL_FLAVOR, lv_from_flavor); lv_to_valid_after);
config_line_prepend(&job->labels_out, LABEL_FROM_SHA3_DIGEST, config_line_prepend(&common_labels, LABEL_FLAVOR, lv_from_flavor);
config_line_prepend(&common_labels, LABEL_FROM_SHA3_DIGEST,
lv_from_digest); lv_from_digest);
config_line_prepend(&job->labels_out, LABEL_TARGET_SHA3_DIGEST, config_line_prepend(&common_labels, LABEL_TARGET_SHA3_DIGEST,
lv_to_digest); lv_to_digest);
config_line_prepend(&job->labels_out, LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF); config_line_prepend(&common_labels, LABEL_DOCTYPE,
DOCTYPE_CONSENSUS_DIFF);
job->out[0].labels = config_lines_dup(common_labels);
cdm_labels_prepend_sha3(&job->out[0].labels,
LABEL_SHA3_DIGEST,
job->out[0].body,
job->out[0].bodylen);
unsigned u;
for (u = 1; u < n_diff_compression_methods(); ++u) {
compress_method_t method = compress_diffs_with[u];
const char *methodname = compression_method_get_name(method);
char *result;
size_t sz;
if (0 == tor_compress(&result, &sz, consensus_diff, difflen, method)) {
job->out[u].body = (uint8_t*)result;
job->out[u].bodylen = sz;
job->out[u].labels = config_lines_dup(common_labels);
cdm_labels_prepend_sha3(&job->out[u].labels, LABEL_SHA3_DIGEST,
job->out[u].body,
job->out[u].bodylen);
config_line_prepend(&job->out[u].labels,
LABEL_COMPRESSION_TYPE,
methodname);
}
}
config_free_lines(common_labels);
return WQ_RPL_REPLY; return WQ_RPL_REPLY;
} }
@ -1068,8 +1150,11 @@ consensus_diff_worker_job_free(consensus_diff_worker_job_t *job)
{ {
if (!job) if (!job)
return; return;
tor_free(job->body_out); unsigned u;
config_free_lines(job->labels_out); for (u = 0; u < n_diff_compression_methods(); ++u) {
config_free_lines(job->out[u].labels);
tor_free(job->out[u].body);
}
consensus_cache_entry_decref(job->diff_from); consensus_cache_entry_decref(job->diff_from);
consensus_cache_entry_decref(job->diff_to); consensus_cache_entry_decref(job->diff_to);
tor_free(job); tor_free(job);
@ -1117,20 +1202,35 @@ consensus_diff_worker_replyfn(void *work_)
cache = 0; cache = 0;
} }
int status; int status = CDM_DIFF_ERROR;
consensus_cache_entry_handle_t *handle = NULL; consensus_cache_entry_handle_t *handles[ARRAY_LENGTH(compress_diffs_with)];
if (job->body_out && job->bodylen_out && job->labels_out) { memset(handles, 0, sizeof(handles));
unsigned u;
for (u = 0; u < n_diff_compression_methods(); ++u) {
compress_method_t method = compress_diffs_with[u];
uint8_t *body_out = job->out[u].body;
size_t bodylen_out = job->out[u].bodylen;
config_line_t *labels = job->out[u].labels;
const char *methodname = compression_method_get_name(method);
if (body_out && bodylen_out && labels) {
/* Success! Store the results */ /* Success! Store the results */
log_info(LD_DIRSERV, "Adding consensus diff from %s to %s", log_info(LD_DIRSERV, "Adding consensus diff from %s to %s, "
lv_from_digest, lv_to_digest); "compressed with %s",
lv_from_digest, lv_to_digest, methodname);
consensus_cache_entry_t *ent = consensus_cache_entry_t *ent =
consensus_cache_add(cdm_cache_get(), job->labels_out, consensus_cache_add(cdm_cache_get(),
job->body_out, labels,
job->bodylen_out); body_out,
bodylen_out);
status = CDM_DIFF_PRESENT; status = CDM_DIFF_PRESENT;
handle = consensus_cache_entry_handle_new(ent); handles[u] = consensus_cache_entry_handle_new(ent);
consensus_cache_entry_decref(ent); consensus_cache_entry_decref(ent);
} else { }
}
if (status != CDM_DIFF_PRESENT) {
/* Failure! Nothing to do but complain */ /* Failure! Nothing to do but complain */
log_warn(LD_DIRSERV, log_warn(LD_DIRSERV,
"Worker was unable to compute consensus diff " "Worker was unable to compute consensus diff "
@ -1139,10 +1239,15 @@ consensus_diff_worker_replyfn(void *work_)
status = CDM_DIFF_ERROR; status = CDM_DIFF_ERROR;
} }
if (cache) for (u = 0; u < ARRAY_LENGTH(handles); ++u) {
cdm_diff_ht_set_status(flav, from_sha3, to_sha3, status, handle); compress_method_t method = compress_diffs_with[u];
else if (cache) {
consensus_cache_entry_handle_free(handle); cdm_diff_ht_set_status(flav, from_sha3, to_sha3, method, status,
handles[u]);
} else {
consensus_cache_entry_handle_free(handles[u]);
}
}
consensus_diff_worker_job_free(job); consensus_diff_worker_job_free(job);
} }

View File

@ -28,7 +28,8 @@ consdiff_status_t consdiffmgr_find_diff_from(
consensus_flavor_t flavor, consensus_flavor_t flavor,
int digest_type, int digest_type,
const uint8_t *digest, const uint8_t *digest,
size_t digestlen); size_t digestlen,
compress_method_t method);
void consdiffmgr_rescan(void); void consdiffmgr_rescan(void);
int consdiffmgr_cleanup(void); int consdiffmgr_cleanup(void);
void consdiffmgr_configure(const consdiff_cfg_t *cfg); void consdiffmgr_configure(const consdiff_cfg_t *cfg);
@ -36,6 +37,7 @@ void consdiffmgr_free_all(void);
int consdiffmgr_validate(void); int consdiffmgr_validate(void);
#ifdef CONSDIFFMGR_PRIVATE #ifdef CONSDIFFMGR_PRIVATE
STATIC unsigned n_diff_compression_methods(void);
STATIC consensus_cache_t *cdm_cache_get(void); STATIC consensus_cache_t *cdm_cache_get(void);
STATIC consensus_cache_entry_t *cdm_cache_lookup_consensus( STATIC consensus_cache_entry_t *cdm_cache_lookup_consensus(
consensus_flavor_t flavor, time_t valid_after); consensus_flavor_t flavor, time_t valid_after);

View File

@ -141,7 +141,8 @@ lookup_diff_from(consensus_cache_entry_t **out,
uint8_t digest[DIGEST256_LEN]; uint8_t digest[DIGEST256_LEN];
crypto_digest256((char*)digest, str1, strlen(str1), DIGEST_SHA3_256); crypto_digest256((char*)digest, str1, strlen(str1), DIGEST_SHA3_256);
return consdiffmgr_find_diff_from(out, flav, return consdiffmgr_find_diff_from(out, flav,
DIGEST_SHA3_256, digest, sizeof(digest)); DIGEST_SHA3_256, digest, sizeof(digest),
NO_METHOD);
} }
static int static int
@ -373,7 +374,8 @@ test_consdiffmgr_make_diffs(void *arg)
tt_int_op(1, OP_EQ, smartlist_len(fake_cpuworker_queue)); tt_int_op(1, OP_EQ, smartlist_len(fake_cpuworker_queue));
diff_status = consdiffmgr_find_diff_from(&diff, FLAV_MICRODESC, diff_status = consdiffmgr_find_diff_from(&diff, FLAV_MICRODESC,
DIGEST_SHA3_256, DIGEST_SHA3_256,
md_ns_sha3, DIGEST256_LEN); md_ns_sha3, DIGEST256_LEN,
NO_METHOD);
tt_int_op(CONSDIFF_IN_PROGRESS, OP_EQ, diff_status); tt_int_op(CONSDIFF_IN_PROGRESS, OP_EQ, diff_status);
// Now run that process and get the diff. // Now run that process and get the diff.
@ -384,7 +386,8 @@ test_consdiffmgr_make_diffs(void *arg)
// At this point we should be able to get that diff. // At this point we should be able to get that diff.
diff_status = consdiffmgr_find_diff_from(&diff, FLAV_MICRODESC, diff_status = consdiffmgr_find_diff_from(&diff, FLAV_MICRODESC,
DIGEST_SHA3_256, DIGEST_SHA3_256,
md_ns_sha3, DIGEST256_LEN); md_ns_sha3, DIGEST256_LEN,
NO_METHOD);
tt_int_op(CONSDIFF_AVAILABLE, OP_EQ, diff_status); tt_int_op(CONSDIFF_AVAILABLE, OP_EQ, diff_status);
tt_assert(diff); tt_assert(diff);
@ -757,7 +760,7 @@ test_consdiffmgr_cleanup_old_diffs(void *arg)
/* Now add an even-more-recent consensus; this should make all previous /* Now add an even-more-recent consensus; this should make all previous
* diffs deletable */ * diffs deletable */
tt_int_op(0, OP_EQ, consdiffmgr_add_consensus(md_body[3], md_ns[3])); tt_int_op(0, OP_EQ, consdiffmgr_add_consensus(md_body[3], md_ns[3]));
tt_int_op(2, OP_EQ, consdiffmgr_cleanup()); tt_int_op(2 * n_diff_compression_methods(), OP_EQ, consdiffmgr_cleanup());
tt_int_op(CONSDIFF_NOT_FOUND, OP_EQ, tt_int_op(CONSDIFF_NOT_FOUND, OP_EQ,
lookup_diff_from(&ent, FLAV_MICRODESC, md_body[0])); lookup_diff_from(&ent, FLAV_MICRODESC, md_body[0]));