tor/src/or/microdesc.c

591 lines
18 KiB
C
Raw Normal View History

/* Copyright (c) 2009-2010, The Tor Project, Inc. */
/* See LICENSE for licensing information */
#include "or.h"
2010-07-22 10:22:51 +02:00
#include "config.h"
#include "directory.h"
2010-07-23 20:04:12 +02:00
#include "microdesc.h"
#include "nodelist.h"
2010-07-23 23:23:43 +02:00
#include "routerparse.h"
#include "networkstatus.h"
#include "routerlist.h"
#include "dirserv.h"
/** A data structure to hold a bunch of cached microdescriptors. There are
* two active files in the cache: a "cache file" that we mmap, and a "journal
* file" that we append to. Periodically, we rebuild the cache file to hold
* only the microdescriptors that we want to keep */
struct microdesc_cache_t {
/** Map from sha256-digest to microdesc_t for every microdesc_t in the
* cache. */
HT_HEAD(microdesc_map, microdesc_t) map;
/** Name of the cache file. */
char *cache_fname;
/** Name of the journal file. */
char *journal_fname;
/** Mmap'd contents of the cache file, or NULL if there is none. */
tor_mmap_t *cache_content;
/** Number of bytes used in the journal file. */
size_t journal_len;
/** Number of bytes in descriptors removed as too old. */
size_t bytes_dropped;
/** Total bytes of microdescriptor bodies we have added to this cache */
uint64_t total_len_seen;
/** Total number of microdescriptors we have added to this cache */
unsigned n_seen;
};
/** Helper: computes a hash of <b>md</b> to place it in a hash table. */
static INLINE unsigned int
_microdesc_hash(microdesc_t *md)
{
unsigned *d = (unsigned*)md->digest;
#if SIZEOF_INT == 4
return d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[7];
#else
return d[0] ^ d[1] ^ d[2] ^ d[3];
#endif
}
/** Helper: compares <b>a</b> and </b> for equality for hash-table purposes. */
static INLINE int
_microdesc_eq(microdesc_t *a, microdesc_t *b)
{
return !memcmp(a->digest, b->digest, DIGEST256_LEN);
}
HT_PROTOTYPE(microdesc_map, microdesc_t, node,
_microdesc_hash, _microdesc_eq);
HT_GENERATE(microdesc_map, microdesc_t, node,
_microdesc_hash, _microdesc_eq, 0.6,
malloc, realloc, free);
/** Write the body of <b>md</b> into <b>f</b>, with appropriate annotations.
* On success, return the total number of bytes written, and set
* *<b>annotation_len_out</b> to the number of bytes written as
* annotations. */
static ssize_t
dump_microdescriptor(FILE *f, microdesc_t *md, size_t *annotation_len_out)
{
ssize_t r = 0;
size_t written;
/* XXXX drops unkown annotations. */
if (md->last_listed) {
char buf[ISO_TIME_LEN+1];
char annotation[ISO_TIME_LEN+32];
format_iso_time(buf, md->last_listed);
tor_snprintf(annotation, sizeof(annotation), "@last-listed %s\n", buf);
fputs(annotation, f);
r += strlen(annotation);
*annotation_len_out = r;
} else {
*annotation_len_out = 0;
}
md->off = (off_t) ftell(f);
written = fwrite(md->body, 1, md->bodylen, f);
if (written != md->bodylen) {
log_warn(LD_DIR,
"Couldn't dump microdescriptor (wrote %lu out of %lu): %s",
(unsigned long)written, (unsigned long)md->bodylen,
strerror(ferror(f)));
return -1;
}
r += md->bodylen;
return r;
}
/** Holds a pointer to the current microdesc_cache_t object, or NULL if no
* such object has been allocated. */
static microdesc_cache_t *the_microdesc_cache = NULL;
/** Return a pointer to the microdescriptor cache, loading it if necessary. */
microdesc_cache_t *
get_microdesc_cache(void)
{
if (PREDICT_UNLIKELY(the_microdesc_cache==NULL)) {
microdesc_cache_t *cache = tor_malloc_zero(sizeof(microdesc_cache_t));
HT_INIT(microdesc_map, &cache->map);
cache->cache_fname = get_datadir_fname("cached-microdescs");
cache->journal_fname = get_datadir_fname("cached-microdescs.new");
microdesc_cache_reload(cache);
the_microdesc_cache = cache;
}
return the_microdesc_cache;
}
/* There are three sources of microdescriptors:
1) Generated by us while acting as a directory authority.
2) Loaded from the cache on disk.
3) Downloaded.
*/
/** Decode the microdescriptors from the string starting at <b>s</b> and
* ending at <b>eos</b>, and store them in <b>cache</b>. If <b>no-save</b>,
* mark them as non-writable to disk. If <b>where</b> is SAVED_IN_CACHE,
* leave their bodies as pointers to the mmap'd cache. If where is
* <b>SAVED_NOWHERE</b>, do not allow annotations. If listed_at is positive,
* set the last_listed field of every microdesc to listed_at. If
* requested_digests is non-null, then it contains a list of digests we mean
* to allow, so we should reject any non-requested microdesc with a different
* digest, and alter the list to contain only the digests of those microdescs
* we didn't find.
* Return a list of the added microdescriptors. */
smartlist_t *
microdescs_add_to_cache(microdesc_cache_t *cache,
const char *s, const char *eos, saved_location_t where,
int no_save, time_t listed_at,
smartlist_t *requested_digests256)
{
smartlist_t *descriptors, *added;
const int allow_annotations = (where != SAVED_NOWHERE);
const int copy_body = (where != SAVED_IN_CACHE);
descriptors = microdescs_parse_from_string(s, eos,
allow_annotations,
copy_body);
if (listed_at > 0) {
SMARTLIST_FOREACH(descriptors, microdesc_t *, md,
md->last_listed = listed_at);
}
if (requested_digests256) {
digestmap_t *requested; /* XXXX actuqlly we should just use a
digest256map */
requested = digestmap_new();
SMARTLIST_FOREACH(requested_digests256, const char *, cp,
digestmap_set(requested, cp, (void*)1));
SMARTLIST_FOREACH_BEGIN(descriptors, microdesc_t *, md) {
if (digestmap_get(requested, md->digest)) {
digestmap_set(requested, md->digest, (void*)2);
} else {
log_fn(LOG_PROTOCOL_WARN, LD_DIR, "Received non-requested microcdesc");
microdesc_free(md);
SMARTLIST_DEL_CURRENT(descriptors, md);
}
} SMARTLIST_FOREACH_END(md);
SMARTLIST_FOREACH_BEGIN(requested_digests256, char *, cp) {
if (digestmap_get(requested, cp) == (void*)2) {
tor_free(cp);
SMARTLIST_DEL_CURRENT(requested_digests256, cp);
}
} SMARTLIST_FOREACH_END(cp);
digestmap_free(requested, NULL);
}
added = microdescs_add_list_to_cache(cache, descriptors, where, no_save);
smartlist_free(descriptors);
return added;
}
/* As microdescs_add_to_cache, but takes a list of micrdescriptors instead of
* a string to encode. Frees any members of <b>descriptors</b> that it does
* not add. */
smartlist_t *
microdescs_add_list_to_cache(microdesc_cache_t *cache,
smartlist_t *descriptors, saved_location_t where,
int no_save)
{
smartlist_t *added;
open_file_t *open_file = NULL;
FILE *f = NULL;
// int n_added = 0;
ssize_t size = 0;
if (where == SAVED_NOWHERE && !no_save) {
f = start_writing_to_stdio_file(cache->journal_fname,
OPEN_FLAGS_APPEND|O_BINARY,
0600, &open_file);
if (!f) {
log_warn(LD_DIR, "Couldn't append to journal in %s: %s",
cache->journal_fname, strerror(errno));
return NULL;
}
}
added = smartlist_create();
SMARTLIST_FOREACH_BEGIN(descriptors, microdesc_t *, md) {
microdesc_t *md2;
md2 = HT_FIND(microdesc_map, &cache->map, md);
if (md2) {
/* We already had this one. */
if (md2->last_listed < md->last_listed)
md2->last_listed = md->last_listed;
microdesc_free(md);
continue;
}
/* Okay, it's a new one. */
if (f) {
size_t annotation_len;
size = dump_microdescriptor(f, md, &annotation_len);
if (size < 0) {
/* XXX handle errors from dump_microdescriptor() */
/* log? return -1? die? coredump the universe? */
continue;
}
md->saved_location = SAVED_IN_JOURNAL;
cache->journal_len += size;
} else {
md->saved_location = where;
}
md->no_save = no_save;
HT_INSERT(microdesc_map, &cache->map, md);
smartlist_add(added, md);
++cache->n_seen;
cache->total_len_seen += md->bodylen;
} SMARTLIST_FOREACH_END(md);
if (f)
finish_writing_to_file(open_file); /*XXX Check me.*/
{
size_t old_content_len =
cache->cache_content ? cache->cache_content->size : 0;
if ((cache->journal_len > 16384 + old_content_len &&
cache->journal_len > old_content_len / 2))
microdesc_cache_rebuild(cache);
}
{
networkstatus_t *ns = networkstatus_get_latest_consensus();
if (ns && ns->flavor == FLAV_MICRODESC)
SMARTLIST_FOREACH(added, microdesc_t *, md, nodelist_add_microdesc(md));
}
return added;
}
/** Remove every microdescriptor in <b>cache</b>. */
void
microdesc_cache_clear(microdesc_cache_t *cache)
{
microdesc_t **entry, **next;
for (entry = HT_START(microdesc_map, &cache->map); entry; entry = next) {
microdesc_t *md = *entry;
next = HT_NEXT_RMV(microdesc_map, &cache->map, entry);
microdesc_free(md);
}
HT_CLEAR(microdesc_map, &cache->map);
if (cache->cache_content) {
tor_munmap_file(cache->cache_content);
cache->cache_content = NULL;
}
cache->total_len_seen = 0;
cache->n_seen = 0;
}
/** Reload the contents of <b>cache</b> from disk. If it is empty, load it
* for the first time. Return 0 on success, -1 on failure. */
int
microdesc_cache_reload(microdesc_cache_t *cache)
{
struct stat st;
char *journal_content;
smartlist_t *added;
tor_mmap_t *mm;
int total = 0;
microdesc_cache_clear(cache);
mm = cache->cache_content = tor_mmap_file(cache->cache_fname);
if (mm) {
added = microdescs_add_to_cache(cache, mm->data, mm->data+mm->size,
SAVED_IN_CACHE, 0, -1, NULL);
if (added) {
total += smartlist_len(added);
smartlist_free(added);
}
}
journal_content = read_file_to_str(cache->journal_fname,
RFTS_IGNORE_MISSING, &st);
if (journal_content) {
added = microdescs_add_to_cache(cache, journal_content,
journal_content+st.st_size,
SAVED_IN_JOURNAL, 0, -1, NULL);
if (added) {
total += smartlist_len(added);
smartlist_free(added);
}
tor_free(journal_content);
}
log_notice(LD_DIR, "Reloaded microdescriptor cache. Found %d descriptors.",
total);
return 0;
}
/** DOCDOC */
#define TOLERATE_MICRODESC_AGE (7*24*60*60)
/** DOCDOC */
void
microdesc_cache_clean(microdesc_cache_t *cache)
{
networkstatus_t *consensus;
time_t cutoff;
microdesc_t **mdp, *victim;
int dropped=0, kept=0;
size_t bytes_dropped = 0;
time_t now = time(NULL);
/* If we don't know a consensus, never believe last_listed values */
consensus = networkstatus_get_reasonably_live_consensus(now, FLAV_MICRODESC);
if (consensus == NULL)
return;
cutoff = now - TOLERATE_MICRODESC_AGE;
for (mdp = HT_START(microdesc_map, &cache->map); mdp != NULL; ) {
if ((*mdp)->last_listed < cutoff) {
++dropped;
victim = *mdp;
mdp = HT_NEXT_RMV(microdesc_map, &cache->map, mdp);
bytes_dropped += victim->bodylen;
microdesc_free(victim);
} else {
++kept;
mdp = HT_NEXT(microdesc_map, &cache->map, mdp);
}
}
if (dropped) {
log_notice(LD_DIR, "Removed %d/%d microdescriptors as old.",
dropped,dropped+kept);
cache->bytes_dropped += bytes_dropped;
}
}
/** Regenerate the main cache file for <b>cache</b>, clear the journal file,
* and update every microdesc_t in the cache with pointers to its new
* location. */
int
microdesc_cache_rebuild(microdesc_cache_t *cache)
{
open_file_t *open_file;
FILE *f;
microdesc_t **mdp;
smartlist_t *wrote;
ssize_t size;
off_t off = 0;
int orig_size, new_size;
log_info(LD_DIR, "Rebuilding the microdescriptor cache...");
microdesc_cache_clean(cache);
orig_size = (int)(cache->cache_content ? cache->cache_content->size : 0);
orig_size += (int)cache->journal_len;
f = start_writing_to_stdio_file(cache->cache_fname,
OPEN_FLAGS_REPLACE|O_BINARY,
0600, &open_file);
if (!f)
return -1;
wrote = smartlist_create();
HT_FOREACH(mdp, microdesc_map, &cache->map) {
microdesc_t *md = *mdp;
size_t annotation_len;
if (md->no_save)
continue;
size = dump_microdescriptor(f, md, &annotation_len);
if (size < 0) {
/* XXX handle errors from dump_microdescriptor() */
/* log? return -1? die? coredump the universe? */
continue;
}
md->off = off + annotation_len;
off += size;
if (md->saved_location != SAVED_IN_CACHE) {
tor_free(md->body);
md->saved_location = SAVED_IN_CACHE;
}
smartlist_add(wrote, md);
}
finish_writing_to_file(open_file); /*XXX Check me.*/
if (cache->cache_content)
tor_munmap_file(cache->cache_content);
cache->cache_content = tor_mmap_file(cache->cache_fname);
if (!cache->cache_content && smartlist_len(wrote)) {
log_err(LD_DIR, "Couldn't map file that we just wrote to %s!",
cache->cache_fname);
smartlist_free(wrote);
return -1;
}
SMARTLIST_FOREACH_BEGIN(wrote, microdesc_t *, md) {
tor_assert(md->saved_location == SAVED_IN_CACHE);
md->body = (char*)cache->cache_content->data + md->off;
tor_assert(!memcmp(md->body, "onion-key", 9));
} SMARTLIST_FOREACH_END(md);
smartlist_free(wrote);
write_str_to_file(cache->journal_fname, "", 1);
cache->journal_len = 0;
cache->bytes_dropped = 0;
new_size = (int)cache->cache_content->size;
log_info(LD_DIR, "Done rebuilding microdesc cache. "
"Saved %d bytes; %d still used.",
orig_size-new_size, new_size);
return 0;
}
/** Deallocate a single microdescriptor. Note: the microdescriptor MUST have
* previously been removed from the cache if it had ever been inserted. */
void
microdesc_free(microdesc_t *md)
{
if (!md)
return;
/* Must be removed from hash table! */
if (md->onion_pkey)
crypto_free_pk_env(md->onion_pkey);
if (md->body && md->saved_location != SAVED_IN_CACHE)
tor_free(md->body);
if (md->family) {
SMARTLIST_FOREACH(md->family, char *, cp, tor_free(cp));
smartlist_free(md->family);
}
tor_free(md->exitsummary);
tor_free(md);
}
/** Free all storage held in the microdesc.c module. */
void
microdesc_free_all(void)
{
if (the_microdesc_cache) {
microdesc_cache_clear(the_microdesc_cache);
tor_free(the_microdesc_cache->cache_fname);
tor_free(the_microdesc_cache->journal_fname);
tor_free(the_microdesc_cache);
}
}
/** If there is a microdescriptor in <b>cache</b> whose sha256 digest is
* <b>d</b>, return it. Otherwise return NULL. */
microdesc_t *
microdesc_cache_lookup_by_digest256(microdesc_cache_t *cache, const char *d)
{
microdesc_t *md, search;
if (!cache)
cache = get_microdesc_cache();
memcpy(search.digest, d, DIGEST256_LEN);
md = HT_FIND(microdesc_map, &cache->map, &search);
return md;
}
/** Return the mean size of decriptors added to <b>cache</b> since it was last
* cleared. Used to estimate the size of large downloads. */
size_t
microdesc_average_size(microdesc_cache_t *cache)
{
if (!cache)
cache = get_microdesc_cache();
if (!cache->n_seen)
return 512;
return (size_t)(cache->total_len_seen / cache->n_seen);
}
/** Return a smartlist of all the sha256 digest of the microdescriptors that
* are listed in <b>ns</b> but not present in <b>cache</b>. Returns pointers
* to internals of <b>ns</b>; you should not free the members of the resulting
* smartlist. Omit all microdescriptors whose digest appear in <b>skip</b>. */
smartlist_t *
microdesc_list_missing_digest256(networkstatus_t *ns, microdesc_cache_t *cache,
int downloadable_only, digestmap_t *skip)
{
smartlist_t *result = smartlist_create();
time_t now = time(NULL);
tor_assert(ns->flavor == FLAV_MICRODESC);
SMARTLIST_FOREACH_BEGIN(ns->routerstatus_list, routerstatus_t *, rs) {
if (microdesc_cache_lookup_by_digest256(cache, rs->descriptor_digest))
continue;
if (downloadable_only &&
!download_status_is_ready(&rs->dl_status, now,
MAX_MICRODESC_DOWNLOAD_FAILURES))
continue;
if (skip && digestmap_get(skip, rs->descriptor_digest))
continue;
/* XXXX Also skip if we're a noncache and wouldn't use this router.
* XXXX NM Microdesc
*/
smartlist_add(result, rs->descriptor_digest);
} SMARTLIST_FOREACH_END(rs);
return result;
}
/** DOCDOC */
void
update_microdesc_downloads(time_t now)
{
or_options_t *options = get_options();
networkstatus_t *consensus;
smartlist_t *missing;
digestmap_t *pending;
if (should_delay_dir_fetches(options))
return;
if (directory_too_idle_to_fetch_descriptors(options, now))
return;
consensus = networkstatus_get_reasonably_live_consensus(now, FLAV_MICRODESC);
if (!consensus)
return;
if (!directory_caches_dir_info(options)) {
/* Right now, only caches fetch microdescriptors.
* XXXX NM Microdescs */
return;
}
pending = digestmap_new();
list_pending_microdesc_downloads(pending);
missing = microdesc_list_missing_digest256(consensus,
get_microdesc_cache(),
1,
pending);
digestmap_free(pending, NULL);
launch_descriptor_downloads(DIR_PURPOSE_FETCH_MICRODESC,
missing, NULL, now);
smartlist_free(missing);
}
/** DOCDOC */
void
update_microdescs_from_networkstatus(time_t now)
{
microdesc_cache_t *cache = get_microdesc_cache();
microdesc_t *md;
networkstatus_t *ns =
networkstatus_get_reasonably_live_consensus(now, FLAV_MICRODESC);
if (! ns)
return;
tor_assert(ns->flavor == FLAV_MICRODESC);
SMARTLIST_FOREACH_BEGIN(ns->routerstatus_list, routerstatus_t *, rs) {
md = microdesc_cache_lookup_by_digest256(cache, rs->descriptor_digest);
if (md && ns->valid_after > md->last_listed)
md->last_listed = ns->valid_after;
} SMARTLIST_FOREACH_END(rs);
}
Initial conversion to use node_t throughout our codebase. A node_t is an abstraction over routerstatus_t, routerinfo_t, and microdesc_t. It should try to present a consistent interface to all of them. There should be a node_t for a server whenever there is * A routerinfo_t for it in the routerlist * A routerstatus_t in the current_consensus. (note that a microdesc_t alone isn't enough to make a node_t exist, since microdescriptors aren't usable on their own.) There are three ways to get a node_t right now: looking it up by ID, looking it up by nickname, and iterating over the whole list of microdescriptors. All (or nearly all) functions that are supposed to return "a router" -- especially those used in building connections and circuits -- should return a node_t, not a routerinfo_t or a routerstatus_t. A node_t should hold all the *mutable* flags about a node. This patch moves the is_foo flags from routerinfo_t into node_t. The flags in routerstatus_t remain, but they get set from the consensus and should not change. Some other highlights of this patch are: * Looking up routerinfo and routerstatus by nickname is now unified and based on the "look up a node by nickname" function. This tries to look only at the values from current consensus, and not get confused by the routerinfo_t->is_named flag, which could get set for other weird reasons. This changes the behavior of how authorities (when acting as clients) deal with nodes that have been listed by nickname. * I tried not to artificially increase the size of the diff here by moving functions around. As a result, some functions that now operate on nodes are now in the wrong file -- they should get moved to nodelist.c once this refactoring settles down. This moving should happen as part of a patch that moves functions AND NOTHING ELSE. * Some old code is now left around inside #if 0/1 blocks, and should get removed once I've verified that I don't want it sitting around to see how we used to do things. There are still some unimplemented functions: these are flagged with "UNIMPLEMENTED_NODELIST()." I'll work on filling in the implementation here, piece by piece. I wish this patch could have been smaller, but there did not seem to be any piece of it that was independent from the rest. Moving flags forces many functions that once returned routerinfo_t * to return node_t *, which forces their friends to change, and so on.
2010-09-29 21:00:41 +02:00