From cf7342ab6f653c2dc49134024d668b06bac2c96c Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 14 Dec 2018 13:45:22 -0500 Subject: [PATCH 1/6] Add a benchmark for parsing a microdescriptor --- src/test/bench.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/test/bench.c b/src/test/bench.c index ff8707d41c..f8680c3ab6 100644 --- a/src/test/bench.c +++ b/src/test/bench.c @@ -39,6 +39,9 @@ #include "lib/crypt_ops/digestset.h" #include "lib/crypt_ops/crypto_init.h" +#include "feature/dirparse/microdesc_parse.h" +#include "feature/nodelist/microdesc.h" + #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_PROCESS_CPUTIME_ID) static uint64_t nanostart; static inline uint64_t @@ -639,6 +642,41 @@ bench_ecdh_p224(void) } #endif +static void +bench_md_parse(void) +{ + uint64_t start, end; + const int N = 100000; + // selected arbitrarily + const char md_text[] = + "@last-listed 2018-12-14 18:14:14\n" + "onion-key\n" + "-----BEGIN RSA PUBLIC KEY-----\n" + "MIGJAoGBAMHkZeXNDX/49JqM2BVLmh1Fnb5iMVnatvZZTLJyedqDLkbXZ1WKP5oh\n" + "7ec14dj/k3ntpwHD4s2o3Lb6nfagWbug4+F/rNJ7JuFru/PSyOvDyHGNAuegOXph\n" + "3gTGjdDpv/yPoiadGebbVe8E7n6hO+XxM2W/4dqheKimF0/s9B7HAgMBAAE=\n" + "-----END RSA PUBLIC KEY-----\n" + "ntor-onion-key QgF/EjqlNG1wRHLIop/nCekEH+ETGZSgYOhu26eiTF4=\n" + "family $00E9A86E7733240E60D8435A7BBD634A23894098 " + "$329BD7545DEEEBBDC8C4285F243916F248972102 " + "$69E06EBB2573A4F89330BDF8BC869794A3E10E4D " + "$DCA2A3FAE50B3729DAA15BC95FB21AF03389818B\n" + "p accept 53,80,443,5222-5223,25565\n" + "id ed25519 BzffzY99z6Q8KltcFlUTLWjNTBU7yKK+uQhyi1Ivb3A\n"; + + reset_perftime(); + start = perftime(); + for (int i = 0; i < N; ++i) { + smartlist_t *s = microdescs_parse_from_string(md_text, NULL, 1, + SAVED_IN_CACHE, NULL); + SMARTLIST_FOREACH(s, microdesc_t *, md, microdesc_free(md)); + smartlist_free(s); + } + + end = perftime(); + printf("Microdesc parse: %f nsec\n", NANOCOUNT(start, end, N)); +} + typedef void (*bench_fn)(void); typedef struct benchmark_t { @@ -666,6 +704,8 @@ static struct benchmark_t benchmarks[] = { ENT(ecdh_p256), ENT(ecdh_p224), #endif + + ENT(md_parse), {NULL,NULL,0} }; From 3c35c0d441cc25f750524056113970a376d8432c Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 14 Dec 2018 13:07:55 -0500 Subject: [PATCH 2/6] Add a function to provide an upper bound on base64 decoded length --- src/lib/encoding/binascii.c | 12 ++++++++++++ src/lib/encoding/binascii.h | 1 + src/test/test_util_format.c | 4 +++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/lib/encoding/binascii.c b/src/lib/encoding/binascii.c index e9140d9432..067db075ad 100644 --- a/src/lib/encoding/binascii.c +++ b/src/lib/encoding/binascii.c @@ -179,6 +179,18 @@ base64_encode_size(size_t srclen, int flags) return enclen; } +/** Return an upper bound on the number of bytes that might be needed to hold + * the data from decoding the base64 string srclen. This is only an + * upper bound, since some part of the base64 string might be padding or + * space. */ +size_t +base64_decode_maxsize(size_t srclen) +{ + tor_assert(srclen < INT_MAX / 3); + + return CEIL_DIV(srclen * 3, 4); +} + /** Internal table mapping 6 bit values to the Base64 alphabet. */ static const char base64_encode_table[64] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', diff --git a/src/lib/encoding/binascii.h b/src/lib/encoding/binascii.h index 23cbaa7070..c71ba65dfb 100644 --- a/src/lib/encoding/binascii.h +++ b/src/lib/encoding/binascii.h @@ -42,6 +42,7 @@ const char *hex_str(const char *from, size_t fromlen); #define BASE64_ENCODE_MULTILINE 1 size_t base64_encode_size(size_t srclen, int flags); +size_t base64_decode_maxsize(size_t srclen); int base64_encode(char *dest, size_t destlen, const char *src, size_t srclen, int flags); int base64_decode(char *dest, size_t destlen, const char *src, size_t srclen); diff --git a/src/test/test_util_format.c b/src/test/test_util_format.c index 85d8a8e62e..fd57125b86 100644 --- a/src/test/test_util_format.c +++ b/src/test/test_util_format.c @@ -392,10 +392,13 @@ test_util_format_encoded_size(void *arg) base64_encode(outbuf, sizeof(outbuf), (char *)inbuf, i, 0); tt_int_op(strlen(outbuf), OP_EQ, base64_encode_size(i, 0)); + tt_int_op(i, OP_LE, base64_decode_maxsize(strlen(outbuf))); + base64_encode(outbuf, sizeof(outbuf), (char *)inbuf, i, BASE64_ENCODE_MULTILINE); tt_int_op(strlen(outbuf), OP_EQ, base64_encode_size(i, BASE64_ENCODE_MULTILINE)); + tt_int_op(i, OP_LE, base64_decode_maxsize(strlen(outbuf))); } done: @@ -417,4 +420,3 @@ struct testcase_t util_format_tests[] = { { "encoded_size", test_util_format_encoded_size, 0, NULL, NULL }, END_OF_TESTCASES }; - From 6dc90d290daa29b4ff2c7692be3a2ed64f25dfc1 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 14 Dec 2018 13:11:58 -0500 Subject: [PATCH 3/6] Use 25% less RAM for base64-encoded directory objects We were allocating N bytes to decode an N-byte base64 encoding, when 3N/4 would have been enough. --- src/feature/dirparse/parsecommon.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/feature/dirparse/parsecommon.c b/src/feature/dirparse/parsecommon.c index e00af0eea2..91b775533b 100644 --- a/src/feature/dirparse/parsecommon.c +++ b/src/feature/dirparse/parsecommon.c @@ -393,8 +393,9 @@ get_next_token(memarea_t *area, RET_ERR("Couldn't parse private key."); } else { /* If it's something else, try to base64-decode it */ int r; - tok->object_body = ALLOC(next-*s); /* really, this is too much RAM. */ - r = base64_decode(tok->object_body, next-*s, *s, next-*s); + size_t maxsize = base64_decode_maxsize(next-*s); + tok->object_body = ALLOC(maxsize); + r = base64_decode(tok->object_body, maxsize, *s, next-*s); if (r<0) RET_ERR("Malformed object: bad base64-encoded data"); tok->object_size = r; From 0556942284d7dcdf0a5e7a31e94b925378a338a8 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 14 Dec 2018 13:17:17 -0500 Subject: [PATCH 4/6] Use a single path for all PEM-like objects in get_next_token() Previously, we would decode the PEM wrapper for keys twice: once in get_next_token, and once later in PEM decode. Now we just do all of the wrapper and base64 stuff in get_next_token, and store the base64-decoded part in the token object for keys and non-keys alike. This change should speed up parsing slightly by letting us skip a bunch of stuff in crypto_pk_read_*from_string(), including the tag detection parts of pem_decode(), and an extra key allocation and deallocation pair. Retaining the base64-decoded part in the token object will allow us to speed up our microdesc parsing, since it is the asn1 portion that we actually want to retain. --- src/feature/dirparse/parsecommon.c | 24 +++++++++++++----------- src/test/test_parsecommon.c | 8 ++++---- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/feature/dirparse/parsecommon.c b/src/feature/dirparse/parsecommon.c index 91b775533b..2e7cea8169 100644 --- a/src/feature/dirparse/parsecommon.c +++ b/src/feature/dirparse/parsecommon.c @@ -266,7 +266,7 @@ get_next_token(memarea_t *area, * attack, a bug, or some other nonsense. */ #define MAX_LINE_LENGTH (128*1024) - const char *next, *eol, *obstart; + const char *next, *eol; size_t obname_len; int i; directory_token_t *tok; @@ -352,7 +352,6 @@ get_next_token(memarea_t *area, if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */ goto check_object; - obstart = *s; /* Set obstart to start of object spec */ if (eol - *s <= 16 || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */ strcmp_len(eol-5, "-----", 5) || /* nuls or invalid endings */ (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) { /* name too long */ @@ -383,15 +382,7 @@ get_next_token(memarea_t *area, if (next - *s > MAX_UNPARSED_OBJECT_SIZE) RET_ERR("Couldn't parse object: missing footer or object much too big."); - if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */ - tok->key = crypto_pk_new(); - if (crypto_pk_read_public_key_from_string(tok->key, obstart, eol-obstart)) - RET_ERR("Couldn't parse public key."); - } else if (!strcmp(tok->object_type, "RSA PRIVATE KEY")) { /* private key */ - tok->key = crypto_pk_new(); - if (crypto_pk_read_private_key_from_string(tok->key, obstart, eol-obstart)) - RET_ERR("Couldn't parse private key."); - } else { /* If it's something else, try to base64-decode it */ + { int r; size_t maxsize = base64_decode_maxsize(next-*s); tok->object_body = ALLOC(maxsize); @@ -400,6 +391,17 @@ get_next_token(memarea_t *area, RET_ERR("Malformed object: bad base64-encoded data"); tok->object_size = r; } + + if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */ + tok->key = crypto_pk_asn1_decode(tok->object_body, tok->object_size); + if (! tok->key) + RET_ERR("Couldn't parse public key."); + } else if (!strcmp(tok->object_type, "RSA PRIVATE KEY")) { /* private key */ + tok->key = crypto_pk_asn1_decode_private(tok->object_body, + tok->object_size); + if (! tok->key) + RET_ERR("Couldn't parse private key."); + } *s = eol; check_object: diff --git a/src/test/test_parsecommon.c b/src/test/test_parsecommon.c index 6da125dd0a..8e74fcdb4d 100644 --- a/src/test/test_parsecommon.c +++ b/src/test/test_parsecommon.c @@ -300,8 +300,8 @@ test_parsecommon_get_next_token_parse_keys(void *arg) tt_int_op(token->tp, OP_EQ, R_IPO_ONION_KEY); tt_int_op(token->n_args, OP_EQ, 0); tt_str_op(token->object_type, OP_EQ, "RSA PUBLIC KEY"); - tt_int_op(token->object_size, OP_EQ, 0); - tt_assert(!token->object_body); + tt_int_op(token->object_size, OP_EQ, 140); + tt_assert(token->object_body); tt_assert(token->key); tt_assert(!token->error); @@ -335,8 +335,8 @@ test_parsecommon_get_next_token_parse_keys(void *arg) tt_int_op(token2->tp, OP_EQ, C_CLIENT_KEY); tt_int_op(token2->n_args, OP_EQ, 0); tt_str_op(token2->object_type, OP_EQ, "RSA PRIVATE KEY"); - tt_int_op(token2->object_size, OP_EQ, 0); - tt_assert(!token2->object_body); + tt_int_op(token2->object_size, OP_EQ, 608); + tt_assert(token2->object_body); tt_assert(token2->key); tt_assert(!token->error); From 7113a339dc7c680b9e48565bfaf97a10185a5244 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 14 Dec 2018 13:28:07 -0500 Subject: [PATCH 5/6] Avoid a needless decode/re-encode step in assigning onion keys Previously we had decoded the asn.1 to get a public key, and then discarded the asn.1 so that we had to re-encode the key to store it in the onion_pkey field of a microdesc_t or routerinfo_t. Now we can just do a tor_memdup() instead, which should be loads faster. --- src/feature/dirparse/microdesc_parse.c | 4 ++-- src/feature/dirparse/routerparse.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/feature/dirparse/microdesc_parse.c b/src/feature/dirparse/microdesc_parse.c index 8ad9626377..165945e392 100644 --- a/src/feature/dirparse/microdesc_parse.c +++ b/src/feature/dirparse/microdesc_parse.c @@ -177,8 +177,8 @@ microdescs_parse_from_string(const char *s, const char *eos, "Relay's onion key had invalid exponent."); goto next; } - router_set_rsa_onion_pkey(tok->key, &md->onion_pkey, - &md->onion_pkey_len); + md->onion_pkey = tor_memdup(tok->object_body, tok->object_size); + md->onion_pkey_len = tok->object_size; crypto_pk_free(tok->key); if ((tok = find_opt_by_keyword(tokens, K_ONION_KEY_NTOR))) { diff --git a/src/feature/dirparse/routerparse.c b/src/feature/dirparse/routerparse.c index 2249ab7cbc..358f6e44e8 100644 --- a/src/feature/dirparse/routerparse.c +++ b/src/feature/dirparse/routerparse.c @@ -588,8 +588,8 @@ router_parse_entry_from_string(const char *s, const char *end, "Relay's onion key had invalid exponent."); goto err; } - router_set_rsa_onion_pkey(tok->key, &router->onion_pkey, - &router->onion_pkey_len); + router->onion_pkey = tor_memdup(tok->object_body, tok->object_size); + router->onion_pkey_len = tok->object_size; crypto_pk_free(tok->key); if ((tok = find_opt_by_keyword(tokens, K_ONION_KEY_NTOR))) { From 976c62e62a38c9f30c32ca742a43d59633a0e6ab Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 14 Dec 2018 13:34:32 -0500 Subject: [PATCH 6/6] Changes file for ticket28839 --- changes/ticket28839 | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changes/ticket28839 diff --git a/changes/ticket28839 b/changes/ticket28839 new file mode 100644 index 0000000000..e9f81dc405 --- /dev/null +++ b/changes/ticket28839 @@ -0,0 +1,3 @@ + o Minor features (performance): + - Speed up microdesriptor parsing by about 30%, to help + improve startup time. Closes ticket 28839.