diff --git a/src/core/include.am b/src/core/include.am index aa8e29ad3f..45f4cb5c4e 100644 --- a/src/core/include.am +++ b/src/core/include.am @@ -74,6 +74,7 @@ LIBTOR_APP_A_SOURCES = \ src/feature/dircommon/voting_schedule.c \ src/feature/dirparse/parsecommon.c \ src/feature/dirparse/routerparse.c \ + src/feature/dirparse/unparseable.c \ src/feature/hibernate/hibernate.c \ src/feature/hs/hs_cache.c \ src/feature/hs/hs_cell.c \ @@ -287,6 +288,7 @@ noinst_HEADERS += \ src/feature/dircommon/voting_schedule.h \ src/feature/dirparse/parsecommon.h \ src/feature/dirparse/routerparse.h \ + src/feature/dirparse/unparseable.h \ src/feature/hibernate/hibernate.h \ src/feature/hs/hs_cache.h \ src/feature/hs/hs_cell.h \ diff --git a/src/feature/dirparse/routerparse.c b/src/feature/dirparse/routerparse.c index ea5ac6f000..83890cdb97 100644 --- a/src/feature/dirparse/routerparse.c +++ b/src/feature/dirparse/routerparse.c @@ -82,6 +82,7 @@ #include "lib/crypt_ops/crypto_util.h" #include "lib/memarea/memarea.h" #include "lib/sandbox/sandbox.h" +#include "feature/dirparse/unparseable.h" #include "feature/dirauth/dirvote.h" @@ -107,9 +108,9 @@ #undef log #include -#ifdef HAVE_SYS_STAT_H -#include -#endif +//#ifdef HAVE_SYS_STAT_H +//#include +//#endif /****************************************************************************/ @@ -423,581 +424,6 @@ static int check_signature_token(const char *digest, #define DUMP_AREA(a,name) STMT_NIL #endif /* defined(DEBUG_AREA_ALLOC) */ -/* Dump mechanism for unparseable descriptors */ - -/** List of dumped descriptors for FIFO cleanup purposes */ -STATIC smartlist_t *descs_dumped = NULL; -/** Total size of dumped descriptors for FIFO cleanup */ -STATIC uint64_t len_descs_dumped = 0; -/** Directory to stash dumps in */ -static int have_dump_desc_dir = 0; -static int problem_with_dump_desc_dir = 0; - -#define DESC_DUMP_DATADIR_SUBDIR "unparseable-descs" -#define DESC_DUMP_BASE_FILENAME "unparseable-desc" - -/** Find the dump directory and check if we'll be able to create it */ -static void -dump_desc_init(void) -{ - char *dump_desc_dir; - - dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR); - - /* - * We just check for it, don't create it at this point; we'll - * create it when we need it if it isn't already there. - */ - if (check_private_dir(dump_desc_dir, CPD_CHECK, get_options()->User) < 0) { - /* Error, log and flag it as having a problem */ - log_notice(LD_DIR, - "Doesn't look like we'll be able to create descriptor dump " - "directory %s; dumps will be disabled.", - dump_desc_dir); - problem_with_dump_desc_dir = 1; - tor_free(dump_desc_dir); - return; - } - - /* Check if it exists */ - switch (file_status(dump_desc_dir)) { - case FN_DIR: - /* We already have a directory */ - have_dump_desc_dir = 1; - break; - case FN_NOENT: - /* Nothing, we'll need to create it later */ - have_dump_desc_dir = 0; - break; - case FN_ERROR: - /* Log and flag having a problem */ - log_notice(LD_DIR, - "Couldn't check whether descriptor dump directory %s already" - " exists: %s", - dump_desc_dir, strerror(errno)); - problem_with_dump_desc_dir = 1; - break; - case FN_FILE: - case FN_EMPTY: - default: - /* Something else was here! */ - log_notice(LD_DIR, - "Descriptor dump directory %s already exists and isn't a " - "directory", - dump_desc_dir); - problem_with_dump_desc_dir = 1; - } - - if (have_dump_desc_dir && !problem_with_dump_desc_dir) { - dump_desc_populate_fifo_from_directory(dump_desc_dir); - } - - tor_free(dump_desc_dir); -} - -/** Create the dump directory if needed and possible */ -static void -dump_desc_create_dir(void) -{ - char *dump_desc_dir; - - /* If the problem flag is set, skip it */ - if (problem_with_dump_desc_dir) return; - - /* Do we need it? */ - if (!have_dump_desc_dir) { - dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR); - - if (check_private_dir(dump_desc_dir, CPD_CREATE, - get_options()->User) < 0) { - log_notice(LD_DIR, - "Failed to create descriptor dump directory %s", - dump_desc_dir); - problem_with_dump_desc_dir = 1; - } - - /* Okay, we created it */ - have_dump_desc_dir = 1; - - tor_free(dump_desc_dir); - } -} - -/** Dump desc FIFO/cleanup; take ownership of the given filename, add it to - * the FIFO, and clean up the oldest entries to the extent they exceed the - * configured cap. If any old entries with a matching hash existed, they - * just got overwritten right before this was called and we should adjust - * the total size counter without deleting them. - */ -static void -dump_desc_fifo_add_and_clean(char *filename, const uint8_t *digest_sha256, - size_t len) -{ - dumped_desc_t *ent = NULL, *tmp; - uint64_t max_len; - - tor_assert(filename != NULL); - tor_assert(digest_sha256 != NULL); - - if (descs_dumped == NULL) { - /* We better have no length, then */ - tor_assert(len_descs_dumped == 0); - /* Make a smartlist */ - descs_dumped = smartlist_new(); - } - - /* Make a new entry to put this one in */ - ent = tor_malloc_zero(sizeof(*ent)); - ent->filename = filename; - ent->len = len; - ent->when = time(NULL); - memcpy(ent->digest_sha256, digest_sha256, DIGEST256_LEN); - - /* Do we need to do some cleanup? */ - max_len = get_options()->MaxUnparseableDescSizeToLog; - /* Iterate over the list until we've freed enough space */ - while (len > max_len - len_descs_dumped && - smartlist_len(descs_dumped) > 0) { - /* Get the oldest thing on the list */ - tmp = (dumped_desc_t *)(smartlist_get(descs_dumped, 0)); - - /* - * Check if it matches the filename we just added, so we don't delete - * something we just emitted if we get repeated identical descriptors. - */ - if (strcmp(tmp->filename, filename) != 0) { - /* Delete it and adjust the length counter */ - tor_unlink(tmp->filename); - tor_assert(len_descs_dumped >= tmp->len); - len_descs_dumped -= tmp->len; - log_info(LD_DIR, - "Deleting old unparseable descriptor dump %s due to " - "space limits", - tmp->filename); - } else { - /* - * Don't delete, but do adjust the counter since we will bump it - * later - */ - tor_assert(len_descs_dumped >= tmp->len); - len_descs_dumped -= tmp->len; - log_info(LD_DIR, - "Replacing old descriptor dump %s with new identical one", - tmp->filename); - } - - /* Free it and remove it from the list */ - smartlist_del_keeporder(descs_dumped, 0); - tor_free(tmp->filename); - tor_free(tmp); - } - - /* Append our entry to the end of the list and bump the counter */ - smartlist_add(descs_dumped, ent); - len_descs_dumped += len; -} - -/** Check if we already have a descriptor for this hash and move it to the - * head of the queue if so. Return 1 if one existed and 0 otherwise. - */ -static int -dump_desc_fifo_bump_hash(const uint8_t *digest_sha256) -{ - dumped_desc_t *match = NULL; - - tor_assert(digest_sha256); - - if (descs_dumped) { - /* Find a match if one exists */ - SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) { - if (ent && - tor_memeq(ent->digest_sha256, digest_sha256, DIGEST256_LEN)) { - /* - * Save a pointer to the match and remove it from its current - * position. - */ - match = ent; - SMARTLIST_DEL_CURRENT_KEEPORDER(descs_dumped, ent); - break; - } - } SMARTLIST_FOREACH_END(ent); - - if (match) { - /* Update the timestamp */ - match->when = time(NULL); - /* Add it back at the end of the list */ - smartlist_add(descs_dumped, match); - - /* Indicate we found one */ - return 1; - } - } - - return 0; -} - -/** Clean up on exit; just memory, leave the dumps behind - */ -STATIC void -dump_desc_fifo_cleanup(void) -{ - if (descs_dumped) { - /* Free each descriptor */ - SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) { - tor_assert(ent); - tor_free(ent->filename); - tor_free(ent); - } SMARTLIST_FOREACH_END(ent); - /* Free the list */ - smartlist_free(descs_dumped); - descs_dumped = NULL; - len_descs_dumped = 0; - } -} - -/** Handle one file for dump_desc_populate_fifo_from_directory(); make sure - * the filename is sensibly formed and matches the file content, and either - * return a dumped_desc_t for it or remove the file and return NULL. - */ -MOCK_IMPL(STATIC dumped_desc_t *, -dump_desc_populate_one_file, (const char *dirname, const char *f)) -{ - dumped_desc_t *ent = NULL; - char *path = NULL, *desc = NULL; - const char *digest_str; - char digest[DIGEST256_LEN], content_digest[DIGEST256_LEN]; - /* Expected prefix before digest in filenames */ - const char *f_pfx = DESC_DUMP_BASE_FILENAME "."; - /* - * Stat while reading; this is important in case the file - * contains a NUL character. - */ - struct stat st; - - /* Sanity-check args */ - tor_assert(dirname != NULL); - tor_assert(f != NULL); - - /* Form the full path */ - tor_asprintf(&path, "%s" PATH_SEPARATOR "%s", dirname, f); - - /* Check that f has the form DESC_DUMP_BASE_FILENAME. */ - - if (!strcmpstart(f, f_pfx)) { - /* It matches the form, but is the digest parseable as such? */ - digest_str = f + strlen(f_pfx); - if (base16_decode(digest, DIGEST256_LEN, - digest_str, strlen(digest_str)) != DIGEST256_LEN) { - /* We failed to decode it */ - digest_str = NULL; - } - } else { - /* No match */ - digest_str = NULL; - } - - if (!digest_str) { - /* We couldn't get a sensible digest */ - log_notice(LD_DIR, - "Removing unrecognized filename %s from unparseable " - "descriptors directory", f); - tor_unlink(path); - /* We're done */ - goto done; - } - - /* - * The filename has the form DESC_DUMP_BASE_FILENAME "." and - * we've decoded the digest. Next, check that we can read it and the - * content matches this digest. We are relying on the fact that if the - * file contains a '\0', read_file_to_str() will allocate space for and - * read the entire file and return the correct size in st. - */ - desc = read_file_to_str(path, RFTS_IGNORE_MISSING|RFTS_BIN, &st); - if (!desc) { - /* We couldn't read it */ - log_notice(LD_DIR, - "Failed to read %s from unparseable descriptors directory; " - "attempting to remove it.", f); - tor_unlink(path); - /* We're done */ - goto done; - } - -#if SIZE_MAX > UINT64_MAX - if (BUG((uint64_t)st.st_size > (uint64_t)SIZE_MAX)) { - /* LCOV_EXCL_START - * Should be impossible since RFTS above should have failed to read the - * huge file into RAM. */ - goto done; - /* LCOV_EXCL_STOP */ - } -#endif /* SIZE_MAX > UINT64_MAX */ - if (BUG(st.st_size < 0)) { - /* LCOV_EXCL_START - * Should be impossible, since the OS isn't supposed to be b0rken. */ - goto done; - /* LCOV_EXCL_STOP */ - } - /* (Now we can be sure that st.st_size is safe to cast to a size_t.) */ - - /* - * We got one; now compute its digest and check that it matches the - * filename. - */ - if (crypto_digest256((char *)content_digest, desc, (size_t) st.st_size, - DIGEST_SHA256) < 0) { - /* Weird, but okay */ - log_info(LD_DIR, - "Unable to hash content of %s from unparseable descriptors " - "directory", f); - tor_unlink(path); - /* We're done */ - goto done; - } - - /* Compare the digests */ - if (tor_memneq(digest, content_digest, DIGEST256_LEN)) { - /* No match */ - log_info(LD_DIR, - "Hash of %s from unparseable descriptors directory didn't " - "match its filename; removing it", f); - tor_unlink(path); - /* We're done */ - goto done; - } - - /* Okay, it's a match, we should prepare ent */ - ent = tor_malloc_zero(sizeof(dumped_desc_t)); - ent->filename = path; - memcpy(ent->digest_sha256, digest, DIGEST256_LEN); - ent->len = (size_t) st.st_size; - ent->when = st.st_mtime; - /* Null out path so we don't free it out from under ent */ - path = NULL; - - done: - /* Free allocations if we had them */ - tor_free(desc); - tor_free(path); - - return ent; -} - -/** Sort helper for dump_desc_populate_fifo_from_directory(); compares - * the when field of dumped_desc_ts in a smartlist to put the FIFO in - * the correct order after reconstructing it from the directory. - */ -static int -dump_desc_compare_fifo_entries(const void **a_v, const void **b_v) -{ - const dumped_desc_t **a = (const dumped_desc_t **)a_v; - const dumped_desc_t **b = (const dumped_desc_t **)b_v; - - if ((a != NULL) && (*a != NULL)) { - if ((b != NULL) && (*b != NULL)) { - /* We have sensible dumped_desc_ts to compare */ - if ((*a)->when < (*b)->when) { - return -1; - } else if ((*a)->when == (*b)->when) { - return 0; - } else { - return 1; - } - } else { - /* - * We shouldn't see this, but what the hell, NULLs precede everythin - * else - */ - return 1; - } - } else { - return -1; - } -} - -/** Scan the contents of the directory, and update FIFO/counters; this will - * consistency-check descriptor dump filenames against hashes of descriptor - * dump file content, and remove any inconsistent/unreadable dumps, and then - * reconstruct the dump FIFO as closely as possible for the last time the - * tor process shut down. If a previous dump was repeated more than once and - * moved ahead in the FIFO, the mtime will not have been updated and the - * reconstructed order will be wrong, but will always be a permutation of - * the original. - */ -STATIC void -dump_desc_populate_fifo_from_directory(const char *dirname) -{ - smartlist_t *files = NULL; - dumped_desc_t *ent = NULL; - - tor_assert(dirname != NULL); - - /* Get a list of files */ - files = tor_listdir(dirname); - if (!files) { - log_notice(LD_DIR, - "Unable to get contents of unparseable descriptor dump " - "directory %s", - dirname); - return; - } - - /* - * Iterate through the list and decide which files should go in the - * FIFO and which should be purged. - */ - - SMARTLIST_FOREACH_BEGIN(files, char *, f) { - /* Try to get a FIFO entry */ - ent = dump_desc_populate_one_file(dirname, f); - if (ent) { - /* - * We got one; add it to the FIFO. No need for duplicate checking - * here since we just verified the name and digest match. - */ - - /* Make sure we have a list to add it to */ - if (!descs_dumped) { - descs_dumped = smartlist_new(); - len_descs_dumped = 0; - } - - /* Add it and adjust the counter */ - smartlist_add(descs_dumped, ent); - len_descs_dumped += ent->len; - } - /* - * If we didn't, we will have unlinked the file if necessary and - * possible, and emitted a log message about it, so just go on to - * the next. - */ - } SMARTLIST_FOREACH_END(f); - - /* Did we get anything? */ - if (descs_dumped != NULL) { - /* Sort the FIFO in order of increasing timestamp */ - smartlist_sort(descs_dumped, dump_desc_compare_fifo_entries); - - /* Log some stats */ - log_info(LD_DIR, - "Reloaded unparseable descriptor dump FIFO with %d dump(s) " - "totaling %"PRIu64 " bytes", - smartlist_len(descs_dumped), (len_descs_dumped)); - } - - /* Free the original list */ - SMARTLIST_FOREACH(files, char *, f, tor_free(f)); - smartlist_free(files); -} - -/** For debugging purposes, dump unparseable descriptor *desc of - * type *type to file $DATADIR/unparseable-desc. Do not write more - * than one descriptor to disk per minute. If there is already such a - * file in the data directory, overwrite it. */ -MOCK_IMPL(STATIC void, -dump_desc,(const char *desc, const char *type)) -{ - tor_assert(desc); - tor_assert(type); - size_t len; - /* The SHA256 of the string */ - uint8_t digest_sha256[DIGEST256_LEN]; - char digest_sha256_hex[HEX_DIGEST256_LEN+1]; - /* Filename to log it to */ - char *debugfile, *debugfile_base; - - /* Get the hash for logging purposes anyway */ - len = strlen(desc); - if (crypto_digest256((char *)digest_sha256, desc, len, - DIGEST_SHA256) < 0) { - log_info(LD_DIR, - "Unable to parse descriptor of type %s, and unable to even hash" - " it!", type); - goto err; - } - - base16_encode(digest_sha256_hex, sizeof(digest_sha256_hex), - (const char *)digest_sha256, sizeof(digest_sha256)); - - /* - * We mention type and hash in the main log; don't clutter up the files - * with anything but the exact dump. - */ - tor_asprintf(&debugfile_base, - DESC_DUMP_BASE_FILENAME ".%s", digest_sha256_hex); - debugfile = get_datadir_fname2(DESC_DUMP_DATADIR_SUBDIR, debugfile_base); - - /* - * Check if the sandbox is active or will become active; see comment - * below at the log message for why. - */ - if (!(sandbox_is_active() || get_options()->Sandbox)) { - if (len <= get_options()->MaxUnparseableDescSizeToLog) { - if (!dump_desc_fifo_bump_hash(digest_sha256)) { - /* Create the directory if needed */ - dump_desc_create_dir(); - /* Make sure we've got it */ - if (have_dump_desc_dir && !problem_with_dump_desc_dir) { - /* Write it, and tell the main log about it */ - write_str_to_file(debugfile, desc, 1); - log_info(LD_DIR, - "Unable to parse descriptor of type %s with hash %s and " - "length %lu. See file %s in data directory for details.", - type, digest_sha256_hex, (unsigned long)len, - debugfile_base); - dump_desc_fifo_add_and_clean(debugfile, digest_sha256, len); - /* Since we handed ownership over, don't free debugfile later */ - debugfile = NULL; - } else { - /* Problem with the subdirectory */ - log_info(LD_DIR, - "Unable to parse descriptor of type %s with hash %s and " - "length %lu. Descriptor not dumped because we had a " - "problem creating the " DESC_DUMP_DATADIR_SUBDIR - " subdirectory", - type, digest_sha256_hex, (unsigned long)len); - /* We do have to free debugfile in this case */ - } - } else { - /* We already had one with this hash dumped */ - log_info(LD_DIR, - "Unable to parse descriptor of type %s with hash %s and " - "length %lu. Descriptor not dumped because one with that " - "hash has already been dumped.", - type, digest_sha256_hex, (unsigned long)len); - /* We do have to free debugfile in this case */ - } - } else { - /* Just log that it happened without dumping */ - log_info(LD_DIR, - "Unable to parse descriptor of type %s with hash %s and " - "length %lu. Descriptor not dumped because it exceeds maximum" - " log size all by itself.", - type, digest_sha256_hex, (unsigned long)len); - /* We do have to free debugfile in this case */ - } - } else { - /* - * Not logging because the sandbox is active and seccomp2 apparently - * doesn't have a sensible way to allow filenames according to a pattern - * match. (If we ever figure out how to say "allow writes to /regex/", - * remove this checK). - */ - log_info(LD_DIR, - "Unable to parse descriptor of type %s with hash %s and " - "length %lu. Descriptor not dumped because the sandbox is " - "configured", - type, digest_sha256_hex, (unsigned long)len); - } - - tor_free(debugfile_base); - tor_free(debugfile); - - err: - return; -} - /** Set digest to the SHA-1 digest of the hash of the directory in * s. Return 0 on success, -1 on failure. */ diff --git a/src/feature/dirparse/routerparse.h b/src/feature/dirparse/routerparse.h index 87c2a75aa5..51d39c6175 100644 --- a/src/feature/dirparse/routerparse.h +++ b/src/feature/dirparse/routerparse.h @@ -114,28 +114,10 @@ void routerparse_init(void); void routerparse_free_all(void); #ifdef ROUTERPARSE_PRIVATE -/* - * One entry in the list of dumped descriptors; filename dumped to, length, - * SHA-256 and timestamp. - */ - -typedef struct { - char *filename; - size_t len; - uint8_t digest_sha256[DIGEST256_LEN]; - time_t when; -} dumped_desc_t; - -EXTERN(uint64_t, len_descs_dumped) -EXTERN(smartlist_t *, descs_dumped) STATIC int routerstatus_parse_guardfraction(const char *guardfraction_str, networkstatus_t *vote, vote_routerstatus_t *vote_rs, routerstatus_t *rs); -MOCK_DECL(STATIC dumped_desc_t *, dump_desc_populate_one_file, - (const char *dirname, const char *f)); -STATIC void dump_desc_populate_fifo_from_directory(const char *dirname); -STATIC void dump_desc_fifo_cleanup(void); struct memarea_t; STATIC routerstatus_t *routerstatus_parse_entry_from_string( struct memarea_t *area, @@ -144,7 +126,6 @@ STATIC routerstatus_t *routerstatus_parse_entry_from_string( vote_routerstatus_t *vote_rs, int consensus_method, consensus_flavor_t flav); -MOCK_DECL(STATIC void,dump_desc,(const char *desc, const char *type)); MOCK_DECL(STATIC int, router_compute_hash_final,(char *digest, const char *start, size_t len, digest_algorithm_t alg)); diff --git a/src/feature/dirparse/unparseable.c b/src/feature/dirparse/unparseable.c new file mode 100644 index 0000000000..80e38d0703 --- /dev/null +++ b/src/feature/dirparse/unparseable.c @@ -0,0 +1,591 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#define UNPARSEABLE_PRIVATE + +#include "core/or/or.h" +#include "app/config/config.h" +#include "feature/dirparse/unparseable.h" +#include "lib/sandbox/sandbox.h" + +#ifdef HAVE_SYS_STAT_H +#include +#endif + +/* Dump mechanism for unparseable descriptors */ + +/** List of dumped descriptors for FIFO cleanup purposes */ +STATIC smartlist_t *descs_dumped = NULL; +/** Total size of dumped descriptors for FIFO cleanup */ +STATIC uint64_t len_descs_dumped = 0; +/** Directory to stash dumps in */ +static int have_dump_desc_dir = 0; +static int problem_with_dump_desc_dir = 0; + +#define DESC_DUMP_DATADIR_SUBDIR "unparseable-descs" +#define DESC_DUMP_BASE_FILENAME "unparseable-desc" + +/** Find the dump directory and check if we'll be able to create it */ +void +dump_desc_init(void) +{ + char *dump_desc_dir; + + dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR); + + /* + * We just check for it, don't create it at this point; we'll + * create it when we need it if it isn't already there. + */ + if (check_private_dir(dump_desc_dir, CPD_CHECK, get_options()->User) < 0) { + /* Error, log and flag it as having a problem */ + log_notice(LD_DIR, + "Doesn't look like we'll be able to create descriptor dump " + "directory %s; dumps will be disabled.", + dump_desc_dir); + problem_with_dump_desc_dir = 1; + tor_free(dump_desc_dir); + return; + } + + /* Check if it exists */ + switch (file_status(dump_desc_dir)) { + case FN_DIR: + /* We already have a directory */ + have_dump_desc_dir = 1; + break; + case FN_NOENT: + /* Nothing, we'll need to create it later */ + have_dump_desc_dir = 0; + break; + case FN_ERROR: + /* Log and flag having a problem */ + log_notice(LD_DIR, + "Couldn't check whether descriptor dump directory %s already" + " exists: %s", + dump_desc_dir, strerror(errno)); + problem_with_dump_desc_dir = 1; + break; + case FN_FILE: + case FN_EMPTY: + default: + /* Something else was here! */ + log_notice(LD_DIR, + "Descriptor dump directory %s already exists and isn't a " + "directory", + dump_desc_dir); + problem_with_dump_desc_dir = 1; + } + + if (have_dump_desc_dir && !problem_with_dump_desc_dir) { + dump_desc_populate_fifo_from_directory(dump_desc_dir); + } + + tor_free(dump_desc_dir); +} + +/** Create the dump directory if needed and possible */ +static void +dump_desc_create_dir(void) +{ + char *dump_desc_dir; + + /* If the problem flag is set, skip it */ + if (problem_with_dump_desc_dir) return; + + /* Do we need it? */ + if (!have_dump_desc_dir) { + dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR); + + if (check_private_dir(dump_desc_dir, CPD_CREATE, + get_options()->User) < 0) { + log_notice(LD_DIR, + "Failed to create descriptor dump directory %s", + dump_desc_dir); + problem_with_dump_desc_dir = 1; + } + + /* Okay, we created it */ + have_dump_desc_dir = 1; + + tor_free(dump_desc_dir); + } +} + +/** Dump desc FIFO/cleanup; take ownership of the given filename, add it to + * the FIFO, and clean up the oldest entries to the extent they exceed the + * configured cap. If any old entries with a matching hash existed, they + * just got overwritten right before this was called and we should adjust + * the total size counter without deleting them. + */ +static void +dump_desc_fifo_add_and_clean(char *filename, const uint8_t *digest_sha256, + size_t len) +{ + dumped_desc_t *ent = NULL, *tmp; + uint64_t max_len; + + tor_assert(filename != NULL); + tor_assert(digest_sha256 != NULL); + + if (descs_dumped == NULL) { + /* We better have no length, then */ + tor_assert(len_descs_dumped == 0); + /* Make a smartlist */ + descs_dumped = smartlist_new(); + } + + /* Make a new entry to put this one in */ + ent = tor_malloc_zero(sizeof(*ent)); + ent->filename = filename; + ent->len = len; + ent->when = time(NULL); + memcpy(ent->digest_sha256, digest_sha256, DIGEST256_LEN); + + /* Do we need to do some cleanup? */ + max_len = get_options()->MaxUnparseableDescSizeToLog; + /* Iterate over the list until we've freed enough space */ + while (len > max_len - len_descs_dumped && + smartlist_len(descs_dumped) > 0) { + /* Get the oldest thing on the list */ + tmp = (dumped_desc_t *)(smartlist_get(descs_dumped, 0)); + + /* + * Check if it matches the filename we just added, so we don't delete + * something we just emitted if we get repeated identical descriptors. + */ + if (strcmp(tmp->filename, filename) != 0) { + /* Delete it and adjust the length counter */ + tor_unlink(tmp->filename); + tor_assert(len_descs_dumped >= tmp->len); + len_descs_dumped -= tmp->len; + log_info(LD_DIR, + "Deleting old unparseable descriptor dump %s due to " + "space limits", + tmp->filename); + } else { + /* + * Don't delete, but do adjust the counter since we will bump it + * later + */ + tor_assert(len_descs_dumped >= tmp->len); + len_descs_dumped -= tmp->len; + log_info(LD_DIR, + "Replacing old descriptor dump %s with new identical one", + tmp->filename); + } + + /* Free it and remove it from the list */ + smartlist_del_keeporder(descs_dumped, 0); + tor_free(tmp->filename); + tor_free(tmp); + } + + /* Append our entry to the end of the list and bump the counter */ + smartlist_add(descs_dumped, ent); + len_descs_dumped += len; +} + +/** Check if we already have a descriptor for this hash and move it to the + * head of the queue if so. Return 1 if one existed and 0 otherwise. + */ +static int +dump_desc_fifo_bump_hash(const uint8_t *digest_sha256) +{ + dumped_desc_t *match = NULL; + + tor_assert(digest_sha256); + + if (descs_dumped) { + /* Find a match if one exists */ + SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) { + if (ent && + tor_memeq(ent->digest_sha256, digest_sha256, DIGEST256_LEN)) { + /* + * Save a pointer to the match and remove it from its current + * position. + */ + match = ent; + SMARTLIST_DEL_CURRENT_KEEPORDER(descs_dumped, ent); + break; + } + } SMARTLIST_FOREACH_END(ent); + + if (match) { + /* Update the timestamp */ + match->when = time(NULL); + /* Add it back at the end of the list */ + smartlist_add(descs_dumped, match); + + /* Indicate we found one */ + return 1; + } + } + + return 0; +} + +/** Clean up on exit; just memory, leave the dumps behind + */ +void +dump_desc_fifo_cleanup(void) +{ + if (descs_dumped) { + /* Free each descriptor */ + SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) { + tor_assert(ent); + tor_free(ent->filename); + tor_free(ent); + } SMARTLIST_FOREACH_END(ent); + /* Free the list */ + smartlist_free(descs_dumped); + descs_dumped = NULL; + len_descs_dumped = 0; + } +} + +/** Handle one file for dump_desc_populate_fifo_from_directory(); make sure + * the filename is sensibly formed and matches the file content, and either + * return a dumped_desc_t for it or remove the file and return NULL. + */ +MOCK_IMPL(STATIC dumped_desc_t *, +dump_desc_populate_one_file, (const char *dirname, const char *f)) +{ + dumped_desc_t *ent = NULL; + char *path = NULL, *desc = NULL; + const char *digest_str; + char digest[DIGEST256_LEN], content_digest[DIGEST256_LEN]; + /* Expected prefix before digest in filenames */ + const char *f_pfx = DESC_DUMP_BASE_FILENAME "."; + /* + * Stat while reading; this is important in case the file + * contains a NUL character. + */ + struct stat st; + + /* Sanity-check args */ + tor_assert(dirname != NULL); + tor_assert(f != NULL); + + /* Form the full path */ + tor_asprintf(&path, "%s" PATH_SEPARATOR "%s", dirname, f); + + /* Check that f has the form DESC_DUMP_BASE_FILENAME. */ + + if (!strcmpstart(f, f_pfx)) { + /* It matches the form, but is the digest parseable as such? */ + digest_str = f + strlen(f_pfx); + if (base16_decode(digest, DIGEST256_LEN, + digest_str, strlen(digest_str)) != DIGEST256_LEN) { + /* We failed to decode it */ + digest_str = NULL; + } + } else { + /* No match */ + digest_str = NULL; + } + + if (!digest_str) { + /* We couldn't get a sensible digest */ + log_notice(LD_DIR, + "Removing unrecognized filename %s from unparseable " + "descriptors directory", f); + tor_unlink(path); + /* We're done */ + goto done; + } + + /* + * The filename has the form DESC_DUMP_BASE_FILENAME "." and + * we've decoded the digest. Next, check that we can read it and the + * content matches this digest. We are relying on the fact that if the + * file contains a '\0', read_file_to_str() will allocate space for and + * read the entire file and return the correct size in st. + */ + desc = read_file_to_str(path, RFTS_IGNORE_MISSING|RFTS_BIN, &st); + if (!desc) { + /* We couldn't read it */ + log_notice(LD_DIR, + "Failed to read %s from unparseable descriptors directory; " + "attempting to remove it.", f); + tor_unlink(path); + /* We're done */ + goto done; + } + +#if SIZE_MAX > UINT64_MAX + if (BUG((uint64_t)st.st_size > (uint64_t)SIZE_MAX)) { + /* LCOV_EXCL_START + * Should be impossible since RFTS above should have failed to read the + * huge file into RAM. */ + goto done; + /* LCOV_EXCL_STOP */ + } +#endif /* SIZE_MAX > UINT64_MAX */ + if (BUG(st.st_size < 0)) { + /* LCOV_EXCL_START + * Should be impossible, since the OS isn't supposed to be b0rken. */ + goto done; + /* LCOV_EXCL_STOP */ + } + /* (Now we can be sure that st.st_size is safe to cast to a size_t.) */ + + /* + * We got one; now compute its digest and check that it matches the + * filename. + */ + if (crypto_digest256((char *)content_digest, desc, (size_t) st.st_size, + DIGEST_SHA256) < 0) { + /* Weird, but okay */ + log_info(LD_DIR, + "Unable to hash content of %s from unparseable descriptors " + "directory", f); + tor_unlink(path); + /* We're done */ + goto done; + } + + /* Compare the digests */ + if (tor_memneq(digest, content_digest, DIGEST256_LEN)) { + /* No match */ + log_info(LD_DIR, + "Hash of %s from unparseable descriptors directory didn't " + "match its filename; removing it", f); + tor_unlink(path); + /* We're done */ + goto done; + } + + /* Okay, it's a match, we should prepare ent */ + ent = tor_malloc_zero(sizeof(dumped_desc_t)); + ent->filename = path; + memcpy(ent->digest_sha256, digest, DIGEST256_LEN); + ent->len = (size_t) st.st_size; + ent->when = st.st_mtime; + /* Null out path so we don't free it out from under ent */ + path = NULL; + + done: + /* Free allocations if we had them */ + tor_free(desc); + tor_free(path); + + return ent; +} + +/** Sort helper for dump_desc_populate_fifo_from_directory(); compares + * the when field of dumped_desc_ts in a smartlist to put the FIFO in + * the correct order after reconstructing it from the directory. + */ +static int +dump_desc_compare_fifo_entries(const void **a_v, const void **b_v) +{ + const dumped_desc_t **a = (const dumped_desc_t **)a_v; + const dumped_desc_t **b = (const dumped_desc_t **)b_v; + + if ((a != NULL) && (*a != NULL)) { + if ((b != NULL) && (*b != NULL)) { + /* We have sensible dumped_desc_ts to compare */ + if ((*a)->when < (*b)->when) { + return -1; + } else if ((*a)->when == (*b)->when) { + return 0; + } else { + return 1; + } + } else { + /* + * We shouldn't see this, but what the hell, NULLs precede everythin + * else + */ + return 1; + } + } else { + return -1; + } +} + +/** Scan the contents of the directory, and update FIFO/counters; this will + * consistency-check descriptor dump filenames against hashes of descriptor + * dump file content, and remove any inconsistent/unreadable dumps, and then + * reconstruct the dump FIFO as closely as possible for the last time the + * tor process shut down. If a previous dump was repeated more than once and + * moved ahead in the FIFO, the mtime will not have been updated and the + * reconstructed order will be wrong, but will always be a permutation of + * the original. + */ +STATIC void +dump_desc_populate_fifo_from_directory(const char *dirname) +{ + smartlist_t *files = NULL; + dumped_desc_t *ent = NULL; + + tor_assert(dirname != NULL); + + /* Get a list of files */ + files = tor_listdir(dirname); + if (!files) { + log_notice(LD_DIR, + "Unable to get contents of unparseable descriptor dump " + "directory %s", + dirname); + return; + } + + /* + * Iterate through the list and decide which files should go in the + * FIFO and which should be purged. + */ + + SMARTLIST_FOREACH_BEGIN(files, char *, f) { + /* Try to get a FIFO entry */ + ent = dump_desc_populate_one_file(dirname, f); + if (ent) { + /* + * We got one; add it to the FIFO. No need for duplicate checking + * here since we just verified the name and digest match. + */ + + /* Make sure we have a list to add it to */ + if (!descs_dumped) { + descs_dumped = smartlist_new(); + len_descs_dumped = 0; + } + + /* Add it and adjust the counter */ + smartlist_add(descs_dumped, ent); + len_descs_dumped += ent->len; + } + /* + * If we didn't, we will have unlinked the file if necessary and + * possible, and emitted a log message about it, so just go on to + * the next. + */ + } SMARTLIST_FOREACH_END(f); + + /* Did we get anything? */ + if (descs_dumped != NULL) { + /* Sort the FIFO in order of increasing timestamp */ + smartlist_sort(descs_dumped, dump_desc_compare_fifo_entries); + + /* Log some stats */ + log_info(LD_DIR, + "Reloaded unparseable descriptor dump FIFO with %d dump(s) " + "totaling %"PRIu64 " bytes", + smartlist_len(descs_dumped), (len_descs_dumped)); + } + + /* Free the original list */ + SMARTLIST_FOREACH(files, char *, f, tor_free(f)); + smartlist_free(files); +} + +/** For debugging purposes, dump unparseable descriptor *desc of + * type *type to file $DATADIR/unparseable-desc. Do not write more + * than one descriptor to disk per minute. If there is already such a + * file in the data directory, overwrite it. */ +MOCK_IMPL(void, +dump_desc,(const char *desc, const char *type)) +{ + tor_assert(desc); + tor_assert(type); + size_t len; + /* The SHA256 of the string */ + uint8_t digest_sha256[DIGEST256_LEN]; + char digest_sha256_hex[HEX_DIGEST256_LEN+1]; + /* Filename to log it to */ + char *debugfile, *debugfile_base; + + /* Get the hash for logging purposes anyway */ + len = strlen(desc); + if (crypto_digest256((char *)digest_sha256, desc, len, + DIGEST_SHA256) < 0) { + log_info(LD_DIR, + "Unable to parse descriptor of type %s, and unable to even hash" + " it!", type); + goto err; + } + + base16_encode(digest_sha256_hex, sizeof(digest_sha256_hex), + (const char *)digest_sha256, sizeof(digest_sha256)); + + /* + * We mention type and hash in the main log; don't clutter up the files + * with anything but the exact dump. + */ + tor_asprintf(&debugfile_base, + DESC_DUMP_BASE_FILENAME ".%s", digest_sha256_hex); + debugfile = get_datadir_fname2(DESC_DUMP_DATADIR_SUBDIR, debugfile_base); + + /* + * Check if the sandbox is active or will become active; see comment + * below at the log message for why. + */ + if (!(sandbox_is_active() || get_options()->Sandbox)) { + if (len <= get_options()->MaxUnparseableDescSizeToLog) { + if (!dump_desc_fifo_bump_hash(digest_sha256)) { + /* Create the directory if needed */ + dump_desc_create_dir(); + /* Make sure we've got it */ + if (have_dump_desc_dir && !problem_with_dump_desc_dir) { + /* Write it, and tell the main log about it */ + write_str_to_file(debugfile, desc, 1); + log_info(LD_DIR, + "Unable to parse descriptor of type %s with hash %s and " + "length %lu. See file %s in data directory for details.", + type, digest_sha256_hex, (unsigned long)len, + debugfile_base); + dump_desc_fifo_add_and_clean(debugfile, digest_sha256, len); + /* Since we handed ownership over, don't free debugfile later */ + debugfile = NULL; + } else { + /* Problem with the subdirectory */ + log_info(LD_DIR, + "Unable to parse descriptor of type %s with hash %s and " + "length %lu. Descriptor not dumped because we had a " + "problem creating the " DESC_DUMP_DATADIR_SUBDIR + " subdirectory", + type, digest_sha256_hex, (unsigned long)len); + /* We do have to free debugfile in this case */ + } + } else { + /* We already had one with this hash dumped */ + log_info(LD_DIR, + "Unable to parse descriptor of type %s with hash %s and " + "length %lu. Descriptor not dumped because one with that " + "hash has already been dumped.", + type, digest_sha256_hex, (unsigned long)len); + /* We do have to free debugfile in this case */ + } + } else { + /* Just log that it happened without dumping */ + log_info(LD_DIR, + "Unable to parse descriptor of type %s with hash %s and " + "length %lu. Descriptor not dumped because it exceeds maximum" + " log size all by itself.", + type, digest_sha256_hex, (unsigned long)len); + /* We do have to free debugfile in this case */ + } + } else { + /* + * Not logging because the sandbox is active and seccomp2 apparently + * doesn't have a sensible way to allow filenames according to a pattern + * match. (If we ever figure out how to say "allow writes to /regex/", + * remove this checK). + */ + log_info(LD_DIR, + "Unable to parse descriptor of type %s with hash %s and " + "length %lu. Descriptor not dumped because the sandbox is " + "configured", + type, digest_sha256_hex, (unsigned long)len); + } + + tor_free(debugfile_base); + tor_free(debugfile); + + err: + return; +} diff --git a/src/feature/dirparse/unparseable.h b/src/feature/dirparse/unparseable.h new file mode 100644 index 0000000000..831ab67777 --- /dev/null +++ b/src/feature/dirparse/unparseable.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file unparseable.h + * \brief Header file for unparseable.c. + **/ + +#ifndef TOR_UNPARSEABLE_H +#define TOR_UNPARSEABLE_H + +#include "lib/cc/torint.h" + +MOCK_DECL(void,dump_desc,(const char *desc, const char *type)); +void dump_desc_fifo_cleanup(void); +void dump_desc_init(void); + +#ifdef UNPARSEABLE_PRIVATE + +/* + * One entry in the list of dumped descriptors; filename dumped to, length, + * SHA-256 and timestamp. + */ + +typedef struct { + char *filename; + size_t len; + uint8_t digest_sha256[DIGEST256_LEN]; + time_t when; +} dumped_desc_t; +struct smartlist_t; + +EXTERN(uint64_t, len_descs_dumped) +EXTERN(struct smartlist_t *, descs_dumped) + +MOCK_DECL(STATIC dumped_desc_t *, dump_desc_populate_one_file, + (const char *dirname, const char *f)); +STATIC void dump_desc_populate_fifo_from_directory(const char *dirname); +#endif + +#endif /* !defined(TOR_UNPARSEABLE_H) */ diff --git a/src/test/fuzz/fuzz_consensus.c b/src/test/fuzz/fuzz_consensus.c index c368b58d04..1b3f019865 100644 --- a/src/test/fuzz/fuzz_consensus.c +++ b/src/test/fuzz/fuzz_consensus.c @@ -3,6 +3,7 @@ #define ROUTERPARSE_PRIVATE #include "core/or/or.h" #include "feature/dirparse/routerparse.h" +#include "feature/dirparse/unparseable.h" #include "feature/nodelist/networkstatus.h" #include "lib/crypt_ops/crypto_ed25519.h" #include "feature/nodelist/networkstatus_st.h" diff --git a/src/test/fuzz/fuzz_descriptor.c b/src/test/fuzz/fuzz_descriptor.c index f7d4a7911a..8087e16391 100644 --- a/src/test/fuzz/fuzz_descriptor.c +++ b/src/test/fuzz/fuzz_descriptor.c @@ -3,6 +3,7 @@ #define ROUTERPARSE_PRIVATE #include "core/or/or.h" #include "feature/dirparse/routerparse.h" +#include "feature/dirparse/unparseable.h" #include "feature/nodelist/routerlist.h" #include "feature/nodelist/torcert.h" #include "feature/keymgt/loadkey.h" diff --git a/src/test/fuzz/fuzz_extrainfo.c b/src/test/fuzz/fuzz_extrainfo.c index 56fd6b7592..3ec2baf1e9 100644 --- a/src/test/fuzz/fuzz_extrainfo.c +++ b/src/test/fuzz/fuzz_extrainfo.c @@ -3,6 +3,7 @@ #define ROUTERPARSE_PRIVATE #include "core/or/or.h" #include "feature/dirparse/routerparse.h" +#include "feature/dirparse/unparseable.h" #include "feature/nodelist/routerlist.h" #include "feature/relay/routerkeys.h" #include "test/fuzz/fuzzing.h" diff --git a/src/test/fuzz/fuzz_hsdescv2.c b/src/test/fuzz/fuzz_hsdescv2.c index ebab5c6ce0..1963114ca7 100644 --- a/src/test/fuzz/fuzz_hsdescv2.c +++ b/src/test/fuzz/fuzz_hsdescv2.c @@ -3,6 +3,7 @@ #define ROUTERPARSE_PRIVATE #include "core/or/or.h" #include "feature/dirparse/routerparse.h" +#include "feature/dirparse/unparseable.h" #include "feature/rend/rendcommon.h" #include "lib/crypt_ops/crypto_ed25519.h" #include "test/fuzz/fuzzing.h" diff --git a/src/test/fuzz/fuzz_hsdescv3.c b/src/test/fuzz/fuzz_hsdescv3.c index 4d630bf9bd..ce6dfb177a 100644 --- a/src/test/fuzz/fuzz_hsdescv3.c +++ b/src/test/fuzz/fuzz_hsdescv3.c @@ -9,6 +9,7 @@ #include "lib/crypt_ops/crypto_ed25519.h" #include "feature/hs/hs_descriptor.h" #include "feature/dirparse/routerparse.h" +#include "feature/dirparse/unparseable.h" #include "test/fuzz/fuzzing.h" diff --git a/src/test/fuzz/fuzz_iptsv2.c b/src/test/fuzz/fuzz_iptsv2.c index 6fdfec3c9f..c2147c8bae 100644 --- a/src/test/fuzz/fuzz_iptsv2.c +++ b/src/test/fuzz/fuzz_iptsv2.c @@ -3,6 +3,7 @@ #define ROUTERPARSE_PRIVATE #include "core/or/or.h" #include "feature/dirparse/routerparse.h" +#include "feature/dirparse/unparseable.h" #include "feature/rend/rendcommon.h" #include "lib/crypt_ops/crypto_ed25519.h" diff --git a/src/test/fuzz/fuzz_microdesc.c b/src/test/fuzz/fuzz_microdesc.c index 15e8b004c9..789e522af6 100644 --- a/src/test/fuzz/fuzz_microdesc.c +++ b/src/test/fuzz/fuzz_microdesc.c @@ -3,6 +3,7 @@ #define ROUTERPARSE_PRIVATE #include "core/or/or.h" #include "feature/dirparse/routerparse.h" +#include "feature/dirparse/unparseable.h" #include "feature/nodelist/microdesc.h" #include "lib/crypt_ops/crypto_ed25519.h" diff --git a/src/test/fuzz/fuzz_vrs.c b/src/test/fuzz/fuzz_vrs.c index 6b22077ae6..8fb6e6ef24 100644 --- a/src/test/fuzz/fuzz_vrs.c +++ b/src/test/fuzz/fuzz_vrs.c @@ -4,6 +4,7 @@ #define NETWORKSTATUS_PRIVATE #include "core/or/or.h" #include "feature/dirparse/routerparse.h" +#include "feature/dirparse/unparseable.h" #include "lib/memarea/memarea.h" #include "feature/nodelist/microdesc.h" #include "feature/nodelist/networkstatus.h" diff --git a/src/test/test_dir.c b/src/test/test_dir.c index 23c8c4a1d3..9c189ba6be 100644 --- a/src/test/test_dir.c +++ b/src/test/test_dir.c @@ -21,6 +21,7 @@ #define ROUTERLIST_PRIVATE #define ROUTERPARSE_PRIVATE #define ROUTER_PRIVATE +#define UNPARSEABLE_PRIVATE #define VOTEFLAGS_PRIVATE #include "core/or/or.h" @@ -52,6 +53,7 @@ #include "feature/nodelist/node_select.h" #include "feature/nodelist/routerlist.h" #include "feature/dirparse/routerparse.h" +#include "feature/dirparse/unparseable.h" #include "feature/nodelist/routerset.h" #include "feature/nodelist/torcert.h" #include "feature/relay/router.h"