Define a "storagedir" abstraction to hold numerous similar files

We could use one of these for holding "junk" descriptors and
unparseable things -- but we'll _need_ it for having cached
consensuses and diffs between them.
This commit is contained in:
Nick Mathewson 2017-03-10 13:22:01 -05:00
parent 118d7018d0
commit ee253e392a
8 changed files with 723 additions and 0 deletions

5
changes/storagedir Normal file
View File

@ -0,0 +1,5 @@
o Minor features (infrastructure, seccomp2 sandbox):
- We now have a document storage backend compatible with the Linux
seccomp2 sandbox. The long-term plan is to use this backend for
consensus documents and for storing unparseable directory
material. Closes ticket 21645.

View File

@ -93,6 +93,7 @@ LIBOR_A_SRC = \
src/common/util_format.c \
src/common/util_process.c \
src/common/sandbox.c \
src/common/storagedir.c \
src/common/workqueue.c \
$(libor_extra_source) \
$(threads_impl_source) \
@ -157,6 +158,7 @@ COMMONHEADERS = \
src/common/procmon.h \
src/common/pubsub.h \
src/common/sandbox.h \
src/common/storagedir.h \
src/common/testsupport.h \
src/common/timers.h \
src/common/torgzip.h \

406
src/common/storagedir.c Normal file
View File

@ -0,0 +1,406 @@
/* Copyright (c) 2017, The Tor Project, Inc. */
/* See LICENSE for licensing information */
#include "container.h"
#include "compat.h"
#include "sandbox.h"
#include "storagedir.h"
#include "torlog.h"
#include "util.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#define FNAME_MIN_NUM 1000
/** A storage_dir_t represents a directory full of similar cached
* files. Filenames are decimal integers. Files can be cleaned as needed
* to limit total disk usage. */
struct storage_dir_t {
/** Directory holding the files for this storagedir. */
char *directory;
/** Either NULL, or a directory listing of the directory (as a smartlist
* of strings */
smartlist_t *contents;
/** The largest number of non-temporary files we'll place in the
* directory. */
int max_files;
/** If true, then 'usage' has been computed. */
int usage_known;
/** The total number of bytes used in this directory */
uint64_t usage;
};
/** Create or open a new storage directory at <b>dirname</b>, with
* capacity for up to <b>max_files</b> files.
*/
storage_dir_t *
storage_dir_new(const char *dirname, int max_files)
{
if (check_private_dir(dirname, CPD_CREATE, NULL) < 0)
return NULL;
storage_dir_t *d = tor_malloc_zero(sizeof(storage_dir_t));
d->directory = tor_strdup(dirname);
d->max_files = max_files;
return d;
}
/**
* Drop all in-RAM storage for <b>d</b>. Does not delete any files.
*/
void
storage_dir_free(storage_dir_t *d)
{
if (d == NULL)
return;
tor_free(d->directory);
if (d->contents) {
SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
smartlist_free(d->contents);
}
tor_free(d);
}
/**
* Tell the sandbox (if any) configured by <b>cfg</b> to allow the
* operations that <b>d</b> will need.
*
* The presence of this function is why we need an upper limit on the
* number of filers in a storage_dir_t: we need to approve file
* operaitons one by one.
*/
int
storage_dir_register_with_sandbox(storage_dir_t *d, sandbox_cfg_t **cfg)
{
int problems = 0;
int idx;
for (idx = FNAME_MIN_NUM; idx < FNAME_MIN_NUM + d->max_files; ++idx) {
char *path = NULL, *tmppath = NULL;
tor_asprintf(&path, "%s/%d", d->directory, idx);
tor_asprintf(&tmppath, "%s/%d.tmp", d->directory, idx);
problems += sandbox_cfg_allow_open_filename(cfg, path);
problems += sandbox_cfg_allow_open_filename(cfg, tmppath);
problems += sandbox_cfg_allow_stat_filename(cfg, path);
problems += sandbox_cfg_allow_stat_filename(cfg, tmppath);
problems += sandbox_cfg_allow_rename(cfg, tmppath, path);
tor_free(path);
tor_free(tmppath);
}
return problems ? -1 : 0;
}
/**
* Remove all files in <b>d</b> whose names end with ".tmp".
*
* Requires that the contents field of <b>d</b> is set.
*/
static void
storage_dir_clean_tmpfiles(storage_dir_t *d)
{
if (!d->contents)
return;
SMARTLIST_FOREACH_BEGIN(d->contents, char *, fname) {
if (strcmpend(fname, ".tmp"))
continue;
char *path = NULL;
tor_asprintf(&path, "%s/%s", d->directory, fname);
if (unlink(sandbox_intern_string(path))) {
log_warn(LD_FS, "Unable to unlink %s", escaped(path));
tor_free(path);
continue;
}
tor_free(path);
SMARTLIST_DEL_CURRENT(d->contents, fname);
tor_free(fname);
} SMARTLIST_FOREACH_END(fname);
d->usage_known = 0;
}
/**
* Re-scan the directory <b>d</b> to learn its contents.
*/
static int
storage_dir_rescan(storage_dir_t *d)
{
if (d->contents) {
SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
smartlist_free(d->contents);
}
d->usage = 0;
d->usage_known = 0;
if (NULL == (d->contents = tor_listdir(d->directory))) {
return -1;
}
storage_dir_clean_tmpfiles(d);
return 0;
}
/**
* Return a smartlist containing the filenames within <b>d</b>.
*/
const smartlist_t *
storage_dir_list(storage_dir_t *d)
{
if (! d->contents)
storage_dir_rescan(d);
return d->contents;
}
/**
* Return the total number of bytes used for storage in <b>d</b>.
*/
uint64_t
storage_dir_get_usage(storage_dir_t *d)
{
if (d->usage_known)
return d->usage;
uint64_t total = 0;
SMARTLIST_FOREACH_BEGIN(storage_dir_list(d), const char *, cp) {
char *path = NULL;
struct stat st;
tor_asprintf(&path, "%s/%s", d->directory, cp);
if (stat(sandbox_intern_string(path), &st) == 0) {
total += st.st_size;
}
tor_free(path);
} SMARTLIST_FOREACH_END(cp);
d->usage = total;
d->usage_known = 1;
return d->usage;
}
/** Mmap a specified file within <b>d</b>. */
tor_mmap_t *
storage_dir_map(storage_dir_t *d, const char *fname)
{
char *path = NULL;
tor_asprintf(&path, "%s/%s", d->directory, fname);
tor_mmap_t *result = tor_mmap_file(path);
tor_free(path);
return result;
}
/** Read a file within <b>d</b> into a newly allocated buffer. Set
* *<b>sz_out</b> to its size. */
uint8_t *
storage_dir_read(storage_dir_t *d, const char *fname, int bin, size_t *sz_out)
{
const int flags = bin ? RFTS_BIN : 0;
char *path = NULL;
tor_asprintf(&path, "%s/%s", d->directory, fname);
struct stat st;
char *contents = read_file_to_str(path, flags, &st);
if (contents && sz_out)
*sz_out = st.st_size;
tor_free(path);
return (uint8_t *) contents;
}
/** Helper: Find an unused filename within the directory */
static char *
find_unused_fname(storage_dir_t *d)
{
if (!d->contents) {
if (storage_dir_rescan(d) < 0)
return NULL;
}
char buf[16];
int i;
/* Yuck; this is quadratic. Fortunately, that shouldn't matter much,
* since disk writes are more expensive by a lot. */
for (i = FNAME_MIN_NUM; i < FNAME_MIN_NUM + d->max_files; ++i) {
tor_snprintf(buf, sizeof(buf), "%d", i);
if (!smartlist_contains_string(d->contents, buf)) {
return tor_strdup(buf);
}
}
return NULL;
}
/** Try to write the <b>length</b> bytes at <b>data</b> into a new file
* in <b>d</b>. On success, return 0 and set *<b>fname_out</b> to a
* newly allocated string containing the filename. On failure, return
* -1. */
int
storage_dir_save_bytes_to_file(storage_dir_t *d,
const uint8_t *data,
size_t length,
int binary,
char **fname_out)
{
char *fname = find_unused_fname(d);
if (!fname)
return -1;
char *path = NULL;
tor_asprintf(&path, "%s/%s", d->directory, fname);
int r = write_bytes_to_file(path, (const char *)data, length, binary);
if (r == 0) {
if (d->usage_known)
d->usage += length;
if (fname_out) {
*fname_out = tor_strdup(fname);
}
if (d->contents)
smartlist_add(d->contents, tor_strdup(fname));
}
tor_free(fname);
tor_free(path);
return r;
}
/**
* As storage_dir_save_bytes_to_file, but saves a NUL-terminated string
* <b>str</b>.
*/
int
storage_dir_save_string_to_file(storage_dir_t *d,
const char *str,
int binary,
char **fname_out)
{
return storage_dir_save_bytes_to_file(d,
(const uint8_t*)str, strlen(str), binary, fname_out);
}
/**
* Remove the file called <b>fname</b> from <b>d</b>.
*/
void
storage_dir_remove_file(storage_dir_t *d,
const char *fname)
{
char *path = NULL;
tor_asprintf(&path, "%s/%s", d->directory, fname);
const char *ipath = sandbox_intern_string(path);
uint64_t size = 0;
if (d->usage_known) {
struct stat st;
if (stat(ipath, &st) == 0) {
size = st.st_size;
}
}
if (unlink(ipath) == 0) {
d->usage -= size;
} else {
log_warn(LD_FS, "Unable to unlink %s", escaped(path));
tor_free(path);
return;
}
if (d->contents) {
smartlist_string_remove(d->contents, fname);
}
tor_free(path);
}
/** Helper type: used to sort the members of storage directory by mtime. */
typedef struct shrinking_dir_entry_t {
time_t mtime;
uint64_t size;
char *path;
} shrinking_dir_entry_t;
/** Helper: use with qsort to sort shrinking_dir_entry_t structs. */
static int
shrinking_dir_entry_compare(const void *a_, const void *b_)
{
const shrinking_dir_entry_t *a = a_;
const shrinking_dir_entry_t *b = b_;
if (a->mtime < b->mtime)
return -1;
else if (a->mtime > b->mtime)
return 1;
else
return 0;
}
/**
* Try to free space by removing the oldest files in <b>d</b>. Delete
* until no more than <b>target_size</b> bytes are left, and at least
* <b>min_to_remove</b> files have been removed... or until there is
* nothing left to remove.
*
* Return 0 on success; -1 on failure.
*/
int
storage_dir_shrink(storage_dir_t *d,
uint64_t target_size,
int min_to_remove)
{
if (d->usage_known && d->usage <= target_size && !min_to_remove) {
/* Already small enough. */
return 0;
}
if (storage_dir_rescan(d) < 0)
return -1;
const uint64_t orig_usage = storage_dir_get_usage(d);
if (orig_usage <= target_size && !min_to_remove) {
/* Okay, small enough after rescan! */
return 0;
}
const int n = smartlist_len(d->contents);
shrinking_dir_entry_t *ents = tor_calloc(n, sizeof(shrinking_dir_entry_t));
SMARTLIST_FOREACH_BEGIN(d->contents, const char *, fname) {
shrinking_dir_entry_t *ent = &ents[fname_sl_idx];
struct stat st;
tor_asprintf(&ent->path, "%s/%s", d->directory, fname);
if (stat(sandbox_intern_string(ent->path), &st) == 0) {
ent->mtime = st.st_mtime;
ent->size = st.st_size;
}
} SMARTLIST_FOREACH_END(fname);
qsort(ents, n, sizeof(shrinking_dir_entry_t), shrinking_dir_entry_compare);
int idx = 0;
while ((d->usage > target_size || min_to_remove > 0) && idx < n) {
if (unlink(sandbox_intern_string(ents[idx].path)) == 0) {
if (! BUG(d->usage < ents[idx].size)) {
d->usage -= ents[idx].size;
}
--min_to_remove;
}
++idx;
}
for (idx = 0; idx < n; ++idx) {
tor_free(ents[idx].path);
}
tor_free(ents);
storage_dir_rescan(d);
return 0;
}
/** Remove all files in <b>d</b>. */
int
storage_dir_remove_all(storage_dir_t *d)
{
return storage_dir_shrink(d, 0, d->max_files);
}

37
src/common/storagedir.h Normal file
View File

@ -0,0 +1,37 @@
/* Copyright (c) 2017, The Tor Project, Inc. */
/* See LICENSE for licensing information */
#ifndef TOR_STORAGEDIR_H
#define TOR_STORAGEDIR_H
typedef struct storage_dir_t storage_dir_t;
struct sandbox_cfg_elem;
storage_dir_t * storage_dir_new(const char *dirname, int n_files);
void storage_dir_free(storage_dir_t *d);
int storage_dir_register_with_sandbox(storage_dir_t *d,
struct sandbox_cfg_elem **cfg);
const smartlist_t *storage_dir_list(storage_dir_t *d);
uint64_t storage_dir_get_usage(storage_dir_t *d);
tor_mmap_t *storage_dir_map(storage_dir_t *d, const char *fname);
uint8_t *storage_dir_read(storage_dir_t *d, const char *fname, int bin,
size_t *sz_out);
int storage_dir_save_bytes_to_file(storage_dir_t *d,
const uint8_t *data,
size_t length,
int binary,
char **fname_out);
int storage_dir_save_string_to_file(storage_dir_t *d,
const char *data,
int binary,
char **fname_out);
void storage_dir_remove_file(storage_dir_t *d,
const char *fname);
int storage_dir_shrink(storage_dir_t *d,
uint64_t target_size,
int min_to_remove);
int storage_dir_remove_all(storage_dir_t *d);
#endif

View File

@ -129,6 +129,7 @@ src_test_test_SOURCES = \
src/test/test_shared_random.c \
src/test/test_socks.c \
src/test/test_status.c \
src/test/test_storagedir.c \
src/test/test_threads.c \
src/test/test_tortls.c \
src/test/test_util.c \

View File

@ -1232,6 +1232,7 @@ struct testgroup_t testgroups[] = {
{ "socks/", socks_tests },
{ "shared-random/", sr_tests },
{ "status/" , status_tests },
{ "storagedir/", storagedir_tests },
{ "tortls/", tortls_tests },
{ "util/", util_tests },
{ "util/format/", util_format_tests },

View File

@ -227,6 +227,7 @@ extern struct testcase_t routerkeys_tests[];
extern struct testcase_t routerlist_tests[];
extern struct testcase_t routerset_tests[];
extern struct testcase_t scheduler_tests[];
extern struct testcase_t storagedir_tests[];
extern struct testcase_t socks_tests[];
extern struct testcase_t status_tests[];
extern struct testcase_t thread_tests[];

270
src/test/test_storagedir.c Normal file
View File

@ -0,0 +1,270 @@
/* Copyright (c) 2017, The Tor Project, Inc. */
/* See LICENSE for licensing information */
#include "or.h"
#include "storagedir.h"
#include "test.h"
#ifdef HAVE_UTIME_H
#include <utime.h>
#endif
static void
test_storagedir_empty(void *arg)
{
char *dirname = tor_strdup(get_fname_rnd("store_dir"));
storage_dir_t *d = NULL;
(void)arg;
tt_int_op(FN_NOENT, OP_EQ, file_status(dirname));
d = storage_dir_new(dirname, 10);
tt_assert(d);
tt_int_op(FN_DIR, OP_EQ, file_status(dirname));
tt_int_op(0, OP_EQ, smartlist_len(storage_dir_list(d)));
tt_u64_op(0, OP_EQ, storage_dir_get_usage(d));
storage_dir_free(d);
d = storage_dir_new(dirname, 10);
tt_assert(d);
tt_int_op(FN_DIR, OP_EQ, file_status(dirname));
tt_int_op(0, OP_EQ, smartlist_len(storage_dir_list(d)));
tt_u64_op(0, OP_EQ, storage_dir_get_usage(d));
done:
storage_dir_free(d);
tor_free(dirname);
}
static void
test_storagedir_basic(void *arg)
{
char *dirname = tor_strdup(get_fname_rnd("store_dir"));
storage_dir_t *d = NULL;
uint8_t *junk = NULL, *bytes = NULL;
const size_t junklen = 1024;
char *fname1 = NULL, *fname2 = NULL;
const char hello_str[] = "then what are we but cold, alone ... ?";
tor_mmap_t *mapping = NULL;
(void)arg;
junk = tor_malloc(junklen);
crypto_rand((void*)junk, junklen);
d = storage_dir_new(dirname, 10);
tt_assert(d);
tt_u64_op(0, OP_EQ, storage_dir_get_usage(d));
int r;
r = storage_dir_save_string_to_file(d, hello_str, 1, &fname1);
tt_int_op(r, OP_EQ, 0);
tt_ptr_op(fname1, OP_NE, NULL);
tt_u64_op(strlen(hello_str), OP_EQ, storage_dir_get_usage(d));
r = storage_dir_save_bytes_to_file(d, junk, junklen, 1, &fname2);
tt_int_op(r, OP_EQ, 0);
tt_ptr_op(fname2, OP_NE, NULL);
tt_str_op(fname1, OP_NE, fname2);
tt_int_op(2, OP_EQ, smartlist_len(storage_dir_list(d)));
tt_u64_op(junklen + strlen(hello_str), OP_EQ, storage_dir_get_usage(d));
tt_assert(smartlist_contains_string(storage_dir_list(d), fname1));
tt_assert(smartlist_contains_string(storage_dir_list(d), fname2));
storage_dir_free(d);
d = storage_dir_new(dirname, 10);
tt_assert(d);
tt_int_op(2, OP_EQ, smartlist_len(storage_dir_list(d)));
tt_u64_op(junklen + strlen(hello_str), OP_EQ, storage_dir_get_usage(d));
tt_assert(smartlist_contains_string(storage_dir_list(d), fname1));
tt_assert(smartlist_contains_string(storage_dir_list(d), fname2));
size_t n;
bytes = storage_dir_read(d, fname2, 1, &n);
tt_assert(bytes);
tt_u64_op(n, OP_EQ, junklen);
tt_mem_op(bytes, OP_EQ, junk, junklen);
mapping = storage_dir_map(d, fname1);
tt_assert(mapping);
tt_u64_op(mapping->size, OP_EQ, strlen(hello_str));
tt_mem_op(mapping->data, OP_EQ, hello_str, strlen(hello_str));
done:
tor_free(dirname);
tor_free(junk);
tor_free(bytes);
tor_munmap_file(mapping);
storage_dir_free(d);
tor_free(fname1);
tor_free(fname2);
}
static void
test_storagedir_deletion(void *arg)
{
(void)arg;
char *dirname = tor_strdup(get_fname_rnd("store_dir"));
storage_dir_t *d = NULL;
char *fn1 = NULL, *fn2 = NULL;
char *bytes = NULL;
int r;
const char str1[] = "There are nine and sixty ways to disguise communiques";
const char str2[] = "And rather more than one of them is right";
// Make sure the directory is there. */
d = storage_dir_new(dirname, 10);
storage_dir_free(d);
d = NULL;
tor_asprintf(&fn1, "%s/1007", dirname);
r = write_str_to_file(fn1, str1, 0);
tt_int_op(r, OP_EQ, 0);
tor_asprintf(&fn2, "%s/1003.tmp", dirname);
r = write_str_to_file(fn2, str2, 0);
tt_int_op(r, OP_EQ, 0);
// The tempfile should be deleted the next time we list the directory.
d = storage_dir_new(dirname, 10);
tt_int_op(1, OP_EQ, smartlist_len(storage_dir_list(d)));
tt_u64_op(strlen(str1), OP_EQ, storage_dir_get_usage(d));
tt_int_op(FN_FILE, OP_EQ, file_status(fn1));
tt_int_op(FN_NOENT, OP_EQ, file_status(fn2));
bytes = (char*) storage_dir_read(d, "1007", 1, NULL);
tt_str_op(bytes, OP_EQ, str1);
// Should have no effect; file already gone.
storage_dir_remove_file(d, "1003.tmp");
tt_int_op(1, OP_EQ, smartlist_len(storage_dir_list(d)));
tt_u64_op(strlen(str1), OP_EQ, storage_dir_get_usage(d));
// Actually remove a file.
storage_dir_remove_file(d, "1007");
tt_int_op(FN_NOENT, OP_EQ, file_status(fn1));
tt_int_op(0, OP_EQ, smartlist_len(storage_dir_list(d)));
tt_u64_op(0, OP_EQ, storage_dir_get_usage(d));
done:
tor_free(dirname);
tor_free(fn1);
tor_free(fn2);
storage_dir_free(d);
tor_free(bytes);
}
static void
test_storagedir_full(void *arg)
{
(void)arg;
char *dirname = tor_strdup(get_fname_rnd("store_dir"));
storage_dir_t *d = NULL;
const char str[] = "enemies of the peephole";
int r;
d = storage_dir_new(dirname, 3);
tt_assert(d);
r = storage_dir_save_string_to_file(d, str, 0, NULL);
tt_int_op(r, OP_EQ, 0);
r = storage_dir_save_string_to_file(d, str, 0, NULL);
tt_int_op(r, OP_EQ, 0);
r = storage_dir_save_string_to_file(d, str, 0, NULL);
tt_int_op(r, OP_EQ, 0);
// These should fail!
r = storage_dir_save_string_to_file(d, str, 0, NULL);
tt_int_op(r, OP_EQ, -1);
r = storage_dir_save_string_to_file(d, str, 0, NULL);
tt_int_op(r, OP_EQ, -1);
tt_u64_op(strlen(str) * 3, OP_EQ, storage_dir_get_usage(d));
done:
tor_free(dirname);
storage_dir_free(d);
}
static void
test_storagedir_cleaning(void *arg)
{
(void)arg;
char *dirname = tor_strdup(get_fname_rnd("store_dir"));
storage_dir_t *d = NULL;
const char str[] =
"On a mountain halfway between Reno and Rome / "
"We have a machine in a plexiglass dome / "
"Which listens and looks into everyone's home."
" -- Dr. Seuss";
char *fns[8];
int r, i;
memset(fns, 0, sizeof(fns));
d = storage_dir_new(dirname, 10);
tt_assert(d);
for (i = 0; i < 8; ++i) {
r = storage_dir_save_string_to_file(d, str+i*2, 0, &fns[i]);
tt_int_op(r, OP_EQ, 0);
}
/* Now we're going to make sure all the files have distinct mtimes. */
time_t now = time(NULL);
struct utimbuf ub;
ub.actime = now;
ub.modtime -= 1000;
for (i = 0; i < 8; ++i) {
char *f = NULL;
tor_asprintf(&f, "%s/%s", dirname, fns[i]);
r = utime(f, &ub);
tor_free(f);
tt_int_op(r, OP_EQ, 0);
ub.modtime += 5;
}
const uint64_t usage_orig = storage_dir_get_usage(d);
/* No changes needed if we are already under target. */
storage_dir_shrink(d, 1024*1024, 0);
tt_u64_op(usage_orig, OP_EQ, storage_dir_get_usage(d));
/* Get rid of at least one byte. This will delete fns[0]. */
storage_dir_shrink(d, usage_orig - 1, 0);
tt_u64_op(usage_orig, OP_GT, storage_dir_get_usage(d));
tt_u64_op(usage_orig - strlen(str), OP_EQ, storage_dir_get_usage(d));
/* Get rid of at least two files. This will delete fns[1] and fns[2]. */
storage_dir_shrink(d, 1024*1024, 2);
tt_u64_op(usage_orig - strlen(str)*3 + 6, OP_EQ, storage_dir_get_usage(d));
/* Get rid of everything. */
storage_dir_remove_all(d);
tt_u64_op(0, OP_EQ, storage_dir_get_usage(d));
done:
tor_free(dirname);
storage_dir_free(d);
for (i = 0; i < 8; ++i) {
tor_free(fns[i]);
}
}
#define ENT(name) \
{ #name, test_storagedir_ ## name, TT_FORK, NULL, NULL }
struct testcase_t storagedir_tests[] = {
ENT(empty),
ENT(basic),
ENT(deletion),
ENT(full),
ENT(cleaning),
END_OF_TESTCASES
};