/* Copyright (c) 2009-2010, The Tor Project, Inc. */ /* See LICENSE for licensing information */ #include "or.h" #include "config.h" #include "directory.h" #include "microdesc.h" #include "routerparse.h" #include "networkstatus.h" #include "routerlist.h" #include "dirserv.h" /** A data structure to hold a bunch of cached microdescriptors. There are * two active files in the cache: a "cache file" that we mmap, and a "journal * file" that we append to. Periodically, we rebuild the cache file to hold * only the microdescriptors that we want to keep */ struct microdesc_cache_t { /** Map from sha256-digest to microdesc_t for every microdesc_t in the * cache. */ HT_HEAD(microdesc_map, microdesc_t) map; /** Name of the cache file. */ char *cache_fname; /** Name of the journal file. */ char *journal_fname; /** Mmap'd contents of the cache file, or NULL if there is none. */ tor_mmap_t *cache_content; /** Number of bytes used in the journal file. */ size_t journal_len; /** Number of bytes in descriptors removed as too old. */ size_t bytes_dropped; /** Total bytes of microdescriptor bodies we have added to this cache */ uint64_t total_len_seen; /** Total number of microdescriptors we have added to this cache */ unsigned n_seen; }; /** Helper: computes a hash of md to place it in a hash table. */ static INLINE unsigned int _microdesc_hash(microdesc_t *md) { unsigned *d = (unsigned*)md->digest; #if SIZEOF_INT == 4 return d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[7]; #else return d[0] ^ d[1] ^ d[2] ^ d[3]; #endif } /** Helper: compares a and for equality for hash-table purposes. */ static INLINE int _microdesc_eq(microdesc_t *a, microdesc_t *b) { return !memcmp(a->digest, b->digest, DIGEST256_LEN); } HT_PROTOTYPE(microdesc_map, microdesc_t, node, _microdesc_hash, _microdesc_eq); HT_GENERATE(microdesc_map, microdesc_t, node, _microdesc_hash, _microdesc_eq, 0.6, malloc, realloc, free); /** Write the body of md into f, with appropriate annotations. * On success, return the total number of bytes written, and set * *annotation_len_out to the number of bytes written as * annotations. */ static ssize_t dump_microdescriptor(FILE *f, microdesc_t *md, size_t *annotation_len_out) { ssize_t r = 0; size_t written; /* XXXX drops unkown annotations. */ if (md->last_listed) { char buf[ISO_TIME_LEN+1]; char annotation[ISO_TIME_LEN+32]; format_iso_time(buf, md->last_listed); tor_snprintf(annotation, sizeof(annotation), "@last-listed %s\n", buf); fputs(annotation, f); r += strlen(annotation); *annotation_len_out = r; } else { *annotation_len_out = 0; } md->off = (off_t) ftell(f); written = fwrite(md->body, 1, md->bodylen, f); if (written != md->bodylen) { log_warn(LD_DIR, "Couldn't dump microdescriptor (wrote %lu out of %lu): %s", (unsigned long)written, (unsigned long)md->bodylen, strerror(ferror(f))); return -1; } r += md->bodylen; return r; } /** Holds a pointer to the current microdesc_cache_t object, or NULL if no * such object has been allocated. */ static microdesc_cache_t *the_microdesc_cache = NULL; /** Return a pointer to the microdescriptor cache, loading it if necessary. */ microdesc_cache_t * get_microdesc_cache(void) { if (PREDICT_UNLIKELY(the_microdesc_cache==NULL)) { microdesc_cache_t *cache = tor_malloc_zero(sizeof(microdesc_cache_t)); HT_INIT(microdesc_map, &cache->map); cache->cache_fname = get_datadir_fname("cached-microdescs"); cache->journal_fname = get_datadir_fname("cached-microdescs.new"); microdesc_cache_reload(cache); the_microdesc_cache = cache; } return the_microdesc_cache; } /* There are three sources of microdescriptors: 1) Generated by us while acting as a directory authority. 2) Loaded from the cache on disk. 3) Downloaded. */ /** Decode the microdescriptors from the string starting at s and * ending at eos, and store them in cache. If no-save, * mark them as non-writable to disk. If where is SAVED_IN_CACHE, * leave their bodies as pointers to the mmap'd cache. If where is * SAVED_NOWHERE, do not allow annotations. If listed_at is positive, * set the last_listed field of every microdesc to listed_at. If * requested_digests is non-null, then it contains a list of digests we mean * to allow, so we should reject any non-requested microdesc with a different * digest, and alter the list to contain only the digests of those microdescs * we didn't find. * Return a list of the added microdescriptors. */ smartlist_t * microdescs_add_to_cache(microdesc_cache_t *cache, const char *s, const char *eos, saved_location_t where, int no_save, time_t listed_at, smartlist_t *requested_digests256) { smartlist_t *descriptors, *added; const int allow_annotations = (where != SAVED_NOWHERE); const int copy_body = (where != SAVED_IN_CACHE); descriptors = microdescs_parse_from_string(s, eos, allow_annotations, copy_body); if (listed_at > 0) { SMARTLIST_FOREACH(descriptors, microdesc_t *, md, md->last_listed = listed_at); } if (requested_digests256) { digestmap_t *requested; /* XXXX actuqlly we should just use a digest256map */ requested = digestmap_new(); SMARTLIST_FOREACH(requested_digests256, const char *, cp, digestmap_set(requested, cp, (void*)1)); SMARTLIST_FOREACH_BEGIN(descriptors, microdesc_t *, md) { if (digestmap_get(requested, md->digest)) { digestmap_set(requested, md->digest, (void*)2); } else { log_fn(LOG_PROTOCOL_WARN, LD_DIR, "Received non-requested microcdesc"); microdesc_free(md); SMARTLIST_DEL_CURRENT(descriptors, md); } } SMARTLIST_FOREACH_END(md); SMARTLIST_FOREACH_BEGIN(requested_digests256, char *, cp) { if (digestmap_get(requested, cp) == (void*)2) { tor_free(cp); SMARTLIST_DEL_CURRENT(requested_digests256, cp); } } SMARTLIST_FOREACH_END(cp); digestmap_free(requested, NULL); } added = microdescs_add_list_to_cache(cache, descriptors, where, no_save); smartlist_free(descriptors); return added; } /* As microdescs_add_to_cache, but takes a list of micrdescriptors instead of * a string to encode. Frees any members of descriptors that it does * not add. */ smartlist_t * microdescs_add_list_to_cache(microdesc_cache_t *cache, smartlist_t *descriptors, saved_location_t where, int no_save) { smartlist_t *added; open_file_t *open_file = NULL; FILE *f = NULL; // int n_added = 0; ssize_t size = 0; if (where == SAVED_NOWHERE && !no_save) { f = start_writing_to_stdio_file(cache->journal_fname, OPEN_FLAGS_APPEND|O_BINARY, 0600, &open_file); if (!f) { log_warn(LD_DIR, "Couldn't append to journal in %s: %s", cache->journal_fname, strerror(errno)); return NULL; } } added = smartlist_create(); SMARTLIST_FOREACH_BEGIN(descriptors, microdesc_t *, md) { microdesc_t *md2; md2 = HT_FIND(microdesc_map, &cache->map, md); if (md2) { /* We already had this one. */ if (md2->last_listed < md->last_listed) md2->last_listed = md->last_listed; microdesc_free(md); continue; } /* Okay, it's a new one. */ if (f) { size_t annotation_len; size = dump_microdescriptor(f, md, &annotation_len); if (size < 0) { /* XXX handle errors from dump_microdescriptor() */ /* log? return -1? die? coredump the universe? */ continue; } md->saved_location = SAVED_IN_JOURNAL; cache->journal_len += size; } else { md->saved_location = where; } md->no_save = no_save; HT_INSERT(microdesc_map, &cache->map, md); smartlist_add(added, md); ++cache->n_seen; cache->total_len_seen += md->bodylen; } SMARTLIST_FOREACH_END(md); if (f) finish_writing_to_file(open_file); /*XXX Check me.*/ { size_t old_content_len = cache->cache_content ? cache->cache_content->size : 0; if ((cache->journal_len > 16384 + old_content_len && cache->journal_len > old_content_len / 2)) microdesc_cache_rebuild(cache); } return added; } /** Remove every microdescriptor in cache. */ void microdesc_cache_clear(microdesc_cache_t *cache) { microdesc_t **entry, **next; for (entry = HT_START(microdesc_map, &cache->map); entry; entry = next) { microdesc_t *md = *entry; next = HT_NEXT_RMV(microdesc_map, &cache->map, entry); microdesc_free(md); } HT_CLEAR(microdesc_map, &cache->map); if (cache->cache_content) { tor_munmap_file(cache->cache_content); cache->cache_content = NULL; } cache->total_len_seen = 0; cache->n_seen = 0; } /** Reload the contents of cache from disk. If it is empty, load it * for the first time. Return 0 on success, -1 on failure. */ int microdesc_cache_reload(microdesc_cache_t *cache) { struct stat st; char *journal_content; smartlist_t *added; tor_mmap_t *mm; int total = 0; microdesc_cache_clear(cache); mm = cache->cache_content = tor_mmap_file(cache->cache_fname); if (mm) { added = microdescs_add_to_cache(cache, mm->data, mm->data+mm->size, SAVED_IN_CACHE, 0, -1, NULL); if (added) { total += smartlist_len(added); smartlist_free(added); } } journal_content = read_file_to_str(cache->journal_fname, RFTS_IGNORE_MISSING, &st); if (journal_content) { added = microdescs_add_to_cache(cache, journal_content, journal_content+st.st_size, SAVED_IN_JOURNAL, 0, -1, NULL); if (added) { total += smartlist_len(added); smartlist_free(added); } tor_free(journal_content); } log_notice(LD_DIR, "Reloaded microdescriptor cache. Found %d descriptors.", total); return 0; } /** DOCDOC */ #define TOLERATE_MICRODESC_AGE (7*24*60*60) /** DOCDOC */ void microdesc_cache_clean(microdesc_cache_t *cache) { networkstatus_t *consensus; time_t cutoff; microdesc_t **mdp, *victim; int dropped=0, kept=0; size_t bytes_dropped = 0; time_t now = time(NULL); /* If we don't know a consensus, never believe last_listed values */ consensus = networkstatus_get_reasonably_live_consensus(now, FLAV_MICRODESC); if (consensus == NULL) return; cutoff = now - TOLERATE_MICRODESC_AGE; for (mdp = HT_START(microdesc_map, &cache->map); mdp != NULL; ) { if ((*mdp)->last_listed < cutoff) { ++dropped; victim = *mdp; mdp = HT_NEXT_RMV(microdesc_map, &cache->map, mdp); bytes_dropped += victim->bodylen; microdesc_free(victim); } else { ++kept; mdp = HT_NEXT(microdesc_map, &cache->map, mdp); } } if (dropped) { log_notice(LD_DIR, "Removed %d/%d microdescriptors as old.", dropped,dropped+kept); cache->bytes_dropped += bytes_dropped; } } /** Regenerate the main cache file for cache, clear the journal file, * and update every microdesc_t in the cache with pointers to its new * location. */ int microdesc_cache_rebuild(microdesc_cache_t *cache) { open_file_t *open_file; FILE *f; microdesc_t **mdp; smartlist_t *wrote; ssize_t size; off_t off = 0; int orig_size, new_size; log_info(LD_DIR, "Rebuilding the microdescriptor cache..."); microdesc_cache_clean(cache); orig_size = (int)(cache->cache_content ? cache->cache_content->size : 0); orig_size += (int)cache->journal_len; f = start_writing_to_stdio_file(cache->cache_fname, OPEN_FLAGS_REPLACE|O_BINARY, 0600, &open_file); if (!f) return -1; wrote = smartlist_create(); HT_FOREACH(mdp, microdesc_map, &cache->map) { microdesc_t *md = *mdp; size_t annotation_len; if (md->no_save) continue; size = dump_microdescriptor(f, md, &annotation_len); if (size < 0) { /* XXX handle errors from dump_microdescriptor() */ /* log? return -1? die? coredump the universe? */ continue; } md->off = off + annotation_len; off += size; if (md->saved_location != SAVED_IN_CACHE) { tor_free(md->body); md->saved_location = SAVED_IN_CACHE; } smartlist_add(wrote, md); } finish_writing_to_file(open_file); /*XXX Check me.*/ if (cache->cache_content) tor_munmap_file(cache->cache_content); cache->cache_content = tor_mmap_file(cache->cache_fname); if (!cache->cache_content && smartlist_len(wrote)) { log_err(LD_DIR, "Couldn't map file that we just wrote to %s!", cache->cache_fname); smartlist_free(wrote); return -1; } SMARTLIST_FOREACH_BEGIN(wrote, microdesc_t *, md) { tor_assert(md->saved_location == SAVED_IN_CACHE); md->body = (char*)cache->cache_content->data + md->off; tor_assert(!memcmp(md->body, "onion-key", 9)); } SMARTLIST_FOREACH_END(md); smartlist_free(wrote); write_str_to_file(cache->journal_fname, "", 1); cache->journal_len = 0; cache->bytes_dropped = 0; new_size = (int)cache->cache_content->size; log_info(LD_DIR, "Done rebuilding microdesc cache. " "Saved %d bytes; %d still used.", orig_size-new_size, new_size); return 0; } /** Deallocate a single microdescriptor. Note: the microdescriptor MUST have * previously been removed from the cache if it had ever been inserted. */ void microdesc_free(microdesc_t *md) { if (!md) return; /* Must be removed from hash table! */ if (md->onion_pkey) crypto_free_pk_env(md->onion_pkey); if (md->body && md->saved_location != SAVED_IN_CACHE) tor_free(md->body); if (md->family) { SMARTLIST_FOREACH(md->family, char *, cp, tor_free(cp)); smartlist_free(md->family); } tor_free(md->exitsummary); tor_free(md); } /** Free all storage held in the microdesc.c module. */ void microdesc_free_all(void) { if (the_microdesc_cache) { microdesc_cache_clear(the_microdesc_cache); tor_free(the_microdesc_cache->cache_fname); tor_free(the_microdesc_cache->journal_fname); tor_free(the_microdesc_cache); } } /** If there is a microdescriptor in cache whose sha256 digest is * d, return it. Otherwise return NULL. */ microdesc_t * microdesc_cache_lookup_by_digest256(microdesc_cache_t *cache, const char *d) { microdesc_t *md, search; if (!cache) cache = get_microdesc_cache(); memcpy(search.digest, d, DIGEST256_LEN); md = HT_FIND(microdesc_map, &cache->map, &search); return md; } /** Return the mean size of decriptors added to cache since it was last * cleared. Used to estimate the size of large downloads. */ size_t microdesc_average_size(microdesc_cache_t *cache) { if (!cache) cache = get_microdesc_cache(); if (!cache->n_seen) return 512; return (size_t)(cache->total_len_seen / cache->n_seen); } /** Return a smartlist of all the sha256 digest of the microdescriptors that * are listed in ns but not present in cache. Returns pointers * to internals of ns; you should not free the members of the resulting * smartlist. Omit all microdescriptors whose digest appear in skip. */ smartlist_t * microdesc_list_missing_digest256(networkstatus_t *ns, microdesc_cache_t *cache, int downloadable_only, digestmap_t *skip) { smartlist_t *result = smartlist_create(); time_t now = time(NULL); tor_assert(ns->flavor == FLAV_MICRODESC); SMARTLIST_FOREACH_BEGIN(ns->routerstatus_list, routerstatus_t *, rs) { if (microdesc_cache_lookup_by_digest256(cache, rs->descriptor_digest)) continue; if (downloadable_only && !download_status_is_ready(&rs->dl_status, now, MAX_MICRODESC_DOWNLOAD_FAILURES)) continue; if (skip && digestmap_get(skip, rs->descriptor_digest)) continue; /* XXXX Also skip if we're a noncache and wouldn't use this router. * XXXX NM Microdesc */ smartlist_add(result, rs->descriptor_digest); } SMARTLIST_FOREACH_END(rs); return result; } /** DOCDOC */ void update_microdesc_downloads(time_t now) { or_options_t *options = get_options(); networkstatus_t *consensus; smartlist_t *missing; digestmap_t *pending; if (should_delay_dir_fetches(options)) return; if (directory_too_idle_to_fetch_descriptors(options, now)) return; consensus = networkstatus_get_reasonably_live_consensus(now, FLAV_MICRODESC); if (!consensus) return; if (!directory_caches_dir_info(options)) { /* Right now, only caches fetch microdescriptors. * XXXX NM Microdescs */ return; } pending = digestmap_new(); list_pending_microdesc_downloads(pending); missing = microdesc_list_missing_digest256(consensus, get_microdesc_cache(), 1, pending); digestmap_free(pending, NULL); launch_descriptor_downloads(DIR_PURPOSE_FETCH_MICRODESC, missing, NULL, now); smartlist_free(missing); } /** DOCDOC */ void update_microdescs_from_networkstatus(time_t now) { microdesc_cache_t *cache = get_microdesc_cache(); microdesc_t *md; networkstatus_t *ns = networkstatus_get_reasonably_live_consensus(now, FLAV_MICRODESC); if (! ns) return; tor_assert(ns->flavor == FLAV_MICRODESC); SMARTLIST_FOREACH_BEGIN(ns->routerstatus_list, routerstatus_t *, rs) { md = microdesc_cache_lookup_by_digest256(cache, rs->descriptor_digest); if (md && ns->valid_after > md->last_listed) md->last_listed = ns->valid_after; } SMARTLIST_FOREACH_END(rs); }