From f079c27761a676f7c4200f7275112edd0b5e1270 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Mon, 6 Jul 2015 09:57:23 +0000 Subject: [PATCH] Integrate the accelerated Curve25519 scalar basemult. Integration work scavanged from nickm's `ticket8897_9663_v2` branch, with minor modifications. Tor will still sanity check the output but now also attempts to catch extreme breakage by spot checking the optimized implementation vs known values from the NaCl documentation. Implements feature 9663. --- changes/feature9663 | 5 ++ src/common/crypto.c | 3 + src/common/crypto_curve25519.c | 119 ++++++++++++++++++++++++++++++++- src/common/crypto_curve25519.h | 5 ++ src/test/bench.c | 15 ++++- src/test/test_crypto.c | 25 +++++++ 6 files changed, 168 insertions(+), 4 deletions(-) create mode 100644 changes/feature9663 diff --git a/changes/feature9663 b/changes/feature9663 new file mode 100644 index 0000000000..c02e08d034 --- /dev/null +++ b/changes/feature9663 @@ -0,0 +1,5 @@ + o Minor feature (performance): + - Improve the runtime speed of the ntor handshake by using an + optimized curve25519 basepoint scalarmult implementation from the + public-domain ed25519-donna by Andrew M. ("floodyberry"), based on + ideas by Adam Langley. Implements ticket 9663. diff --git a/src/common/crypto.c b/src/common/crypto.c index 934679e67a..c9745dd0e6 100644 --- a/src/common/crypto.c +++ b/src/common/crypto.c @@ -25,6 +25,7 @@ #define CRYPTO_PRIVATE #include "crypto.h" +#include "crypto_curve25519.h" #if OPENSSL_VERSION_NUMBER < OPENSSL_V_SERIES(1,0,0) #error "We require OpenSSL >= 1.0.0" @@ -305,6 +306,8 @@ crypto_early_init(void) return -1; if (crypto_init_siphash_key() < 0) return -1; + + curve25519_init(); } return 0; } diff --git a/src/common/crypto_curve25519.c b/src/common/crypto_curve25519.c index 5bb14b0d95..80b0d88952 100644 --- a/src/common/crypto_curve25519.c +++ b/src/common/crypto_curve25519.c @@ -14,6 +14,8 @@ #include "util.h" #include "torlog.h" +#include "ed25519/donna/ed25519_donna_tor.h" + /* ============================== Part 1: wrap a suitable curve25519 implementation as curve25519_impl ============================== */ @@ -30,6 +32,10 @@ int curve25519_donna(uint8_t *mypublic, #endif #endif +static void pick_curve25519_basepoint_impl(void); + +static int curve25519_use_ed = -1; + STATIC int curve25519_impl(uint8_t *output, const uint8_t *secret, const uint8_t *basepoint) @@ -50,6 +56,34 @@ curve25519_impl(uint8_t *output, const uint8_t *secret, return r; } +STATIC int +curve25519_basepoint_impl(uint8_t *output, const uint8_t *secret) +{ + int r = 0; + if (PREDICT_UNLIKELY(curve25519_use_ed == -1)) { + pick_curve25519_basepoint_impl(); + } + + /* TODO: Someone should benchmark curved25519_scalarmult_basepoint versus + * an optimized NaCl build to see which should be used when compiled with + * NaCl available. I suspected that the ed25519 optimization always wins. + */ + if (PREDICT_LIKELY(curve25519_use_ed == 1)) { + curved25519_scalarmult_basepoint_donna(output, secret); + r = 0; + } else { + static const uint8_t basepoint[32] = {9}; + r = curve25519_impl(output, secret, basepoint); + } + return r; +} + +void +curve25519_set_impl_params(int use_ed) +{ + curve25519_use_ed = use_ed; +} + /* ============================== Part 2: Wrap curve25519_impl with some convenience types and functions. ============================== */ @@ -113,9 +147,7 @@ void curve25519_public_key_generate(curve25519_public_key_t *key_out, const curve25519_secret_key_t *seckey) { - static const uint8_t basepoint[32] = {9}; - - curve25519_impl(key_out->public_key, seckey->secret_key, basepoint); + curve25519_basepoint_impl(key_out->public_key, seckey->secret_key); } int @@ -283,3 +315,84 @@ curve25519_handshake(uint8_t *output, curve25519_impl(output, skey->secret_key, pkey->public_key); } +/** Check whether the ed25519-based curve25519 basepoint optimization seems to + * be working. If so, return 0; otherwise return -1. */ +static int +curve25519_basepoint_spot_check(void) +{ + static const uint8_t alicesk[32] = { + 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d, + 0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45, + 0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a, + 0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a + }; + static const uint8_t alicepk[32] = { + 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54, + 0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a, + 0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4, + 0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a + }; + const int loop_max=200; + int save_use_ed = curve25519_use_ed; + unsigned char e1[32] = { 5 }; + unsigned char e2[32] = { 5 }; + unsigned char x[32],y[32]; + int i; + int r=0; + + /* Check the most basic possible sanity via the test secret/public key pair + * used in "Cryptography in NaCl - 2. Secret keys and public keys". This + * may catch catastrophic failures on systems where Curve25519 is expensive, + * without requiring a ton of key generation. + */ + curve25519_use_ed = 1; + r |= curve25519_basepoint_impl(x, alicesk); + if (fast_memneq(x, alicepk, 32)) + goto fail; + + /* Ok, the optimization appears to produce passable results, try a few more + * values, maybe there's something subtle wrong. + */ + for (i = 0; i < loop_max; ++i) { + curve25519_use_ed = 0; + r |= curve25519_basepoint_impl(x, e1); + curve25519_use_ed = 1; + r |= curve25519_basepoint_impl(y, e2); + if (fast_memneq(x,y,32)) + goto fail; + memcpy(e1, x, 32); + memcpy(e2, x, 32); + } + + goto end; + fail: + r = -1; + end: + curve25519_use_ed = save_use_ed; + return r; +} + +/** Choose whether to use the ed25519-based curve25519-basepoint + * implementation. */ +static void +pick_curve25519_basepoint_impl(void) +{ + curve25519_use_ed = 1; + + if (curve25519_basepoint_spot_check() == 0) + return; + + log_warn(LD_CRYPTO, "The ed25519-based curve25519 basepoint " + "multiplication seems broken; using the curve25519 " + "implementation."); + curve25519_use_ed = 0; +} + +/** Initialize the curve25519 implementations. This is necessary if you're + * going to use them in a multithreaded setting, and not otherwise. */ +void +curve25519_init(void) +{ + pick_curve25519_basepoint_impl(); +} + diff --git a/src/common/crypto_curve25519.h b/src/common/crypto_curve25519.h index 48e8a6d962..d53743986c 100644 --- a/src/common/crypto_curve25519.h +++ b/src/common/crypto_curve25519.h @@ -61,6 +61,8 @@ int curve25519_rand_seckey_bytes(uint8_t *out, int extra_strong); #ifdef CRYPTO_CURVE25519_PRIVATE STATIC int curve25519_impl(uint8_t *output, const uint8_t *secret, const uint8_t *basepoint); + +STATIC int curve25519_basepoint_impl(uint8_t *output, const uint8_t *secret); #endif #define CURVE25519_BASE64_PADDED_LEN 44 @@ -82,5 +84,8 @@ ssize_t crypto_read_tagged_contents_from_file(const char *fname, uint8_t *data_out, ssize_t data_out_len); +void curve25519_set_impl_params(int use_ed); +void curve25519_init(void); + #endif diff --git a/src/test/bench.c b/src/test/bench.c index bc2b1f04d8..dbff7d0262 100644 --- a/src/test/bench.c +++ b/src/test/bench.c @@ -177,7 +177,7 @@ bench_onion_TAP(void) } static void -bench_onion_ntor(void) +bench_onion_ntor_impl(void) { const int iters = 1<<10; int i; @@ -234,6 +234,19 @@ bench_onion_ntor(void) dimap_free(keymap, NULL); } +static void +bench_onion_ntor(void) +{ + int ed; + + for (ed = 0; ed <= 1; ++ed) { + printf("Ed25519-based basepoint multiply = %s.\n", + (ed == 0) ? "disabled" : "enabled"); + curve25519_set_impl_params(ed); + bench_onion_ntor_impl(); + } +} + static void bench_ed25519(void) { diff --git a/src/test/test_crypto.c b/src/test/test_crypto.c index 6cba850f30..bc88248db0 100644 --- a/src/test/test_crypto.c +++ b/src/test/test_crypto.c @@ -1124,6 +1124,29 @@ test_crypto_curve25519_impl(void *arg) tor_free(mem_op_hex_tmp); } +static void +test_crypto_curve25519_basepoint(void *arg) +{ + uint8_t secret[32]; + uint8_t public1[32]; + uint8_t public2[32]; + const int iters = 2048; + int i; + (void) arg; + + for (i = 0; i < iters; ++i) { + crypto_rand((char*)secret, 32); + curve25519_set_impl_params(1); /* Use optimization */ + curve25519_basepoint_impl(public1, secret); + curve25519_set_impl_params(0); /* Disable optimization */ + curve25519_basepoint_impl(public2, secret); + tt_mem_op(public1, OP_EQ, public2, 32); + } + + done: + ; +} + static void test_crypto_curve25519_wrappers(void *arg) { @@ -1733,6 +1756,8 @@ struct testcase_t crypto_tests[] = { { "hkdf_sha256", test_crypto_hkdf_sha256, 0, NULL, NULL }, { "curve25519_impl", test_crypto_curve25519_impl, 0, NULL, NULL }, { "curve25519_impl_hibit", test_crypto_curve25519_impl, 0, NULL, (void*)"y"}, + { "curve25519_basepoint", + test_crypto_curve25519_basepoint, TT_FORK, NULL, NULL }, { "curve25519_wrappers", test_crypto_curve25519_wrappers, 0, NULL, NULL }, { "curve25519_encode", test_crypto_curve25519_encode, 0, NULL, NULL }, { "curve25519_persist", test_crypto_curve25519_persist, 0, NULL, NULL },