From 9814019a54a9a62e27b9e95eec5c9f0e5519fc57 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Sun, 20 Nov 2011 21:43:14 -0500 Subject: [PATCH] Use openssl's counter mode implementation when we have 1.0.0 or later This shaves about 7% off our per-cell AES crypto time for me; the effect for accelerated AES crypto should be even more, since the AES calculation itself will make an even smaller portion of the counter-mode performance. (We don't want to do this for pre-1.0.0 OpenSSL, since our AES_CTR implementation was actually faster than OpenSSL's there, by about 10%.) Fixes issue #4526. --- changes/aes_hackery | 8 +++-- src/common/aes.c | 88 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 72 insertions(+), 24 deletions(-) diff --git a/changes/aes_hackery b/changes/aes_hackery index 82aae238c8..b22cefe901 100644 --- a/changes/aes_hackery +++ b/changes/aes_hackery @@ -4,11 +4,13 @@ relatively few servers should still be on any version of OpenSSL that doesn't have good optimized assembly AES. - o Major features: + o Major features (AES performance): - Use OpenSSL's EVP interface for AES encryption, so that all AES operations can use hardware acceleration (if present). Resolves issue #4442. - But only use the EVP interface when AES acceleration is enabled, to avoid a performance regression. Resolves issue #4525. - - + - When using OpenSSL 1.0.0 or later, use OpenSSL's counter mode + implementation; it makes AES_CTR about 7% faster than our old one + (which was about 10% faster than the one OpenSSL used to provide). + Resolves issue #4526. diff --git a/src/common/aes.c b/src/common/aes.c index 53c80a35c3..cec6899817 100644 --- a/src/common/aes.c +++ b/src/common/aes.c @@ -17,6 +17,11 @@ #include #include #include +#if OPENSSL_VERSION_NUMBER >= 0x10000000L +/* See comments about which counter mode implementation to use below. */ +#include +#define USE_OPENSSL_CTR +#endif #include "compat.h" #include "aes.h" #include "util.h" @@ -35,7 +40,13 @@ * faster than indirecting through the EVP layer. */ -/* Include OpenSSL headers as needed. */ +/* We have 2 strategies for counter mode: use our own, or use OpenSSL's. + * + * Here we have a counter mode that's faster than the one shipping with + * OpenSSL pre-1.0 (by about 10%!). But OpenSSL 1.0.0 added a counter mode + * implementation faster than the one here (by about 7%). So we pick which + * one to used based on the Openssl version above. + */ /*======================================================================*/ /* Interface to AES code, and counter implementation */ @@ -48,7 +59,7 @@ struct aes_cnt_cipher { AES_KEY aes; } key; -#if !defined(WORDS_BIGENDIAN) +#if !defined(WORDS_BIGENDIAN) && !defined(USE_OPENSSL_CTR) #define USING_COUNTER_VARS /** These four values, together, implement a 128-bit counter, with * counter0 as the low-order word and counter3 as the high-order word. */ @@ -70,7 +81,11 @@ struct aes_cnt_cipher { /** The encrypted value of ctr_buf. */ uint8_t buf[16]; /** Our current stream position within buf. */ +#ifdef USE_OPENSSL_CTR + unsigned int pos; +#else uint8_t pos; +#endif /** True iff we're using the evp implementation of this cipher. */ uint8_t using_evp; @@ -110,6 +125,7 @@ evaluate_evp_for_aes(int force_val) return 0; } +#ifndef USE_OPENSSL_CTR #if !defined(USING_COUNTER_VARS) #define COUNTER(c, n) ((c)->ctr_buf.buf32[3-(n)]) #else @@ -138,6 +154,7 @@ _aes_fill_buf(aes_cnt_cipher_t *cipher) AES_encrypt(cipher->ctr_buf.buf, cipher->buf, &cipher->key.aes); } } +#endif /** * Return a newly allocated counter-mode AES128 cipher implementation. @@ -171,6 +188,7 @@ aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits) AES_set_encrypt_key((const unsigned char *)key, key_bits, &cipher->key.aes); cipher->using_evp = 0; } + #ifdef USING_COUNTER_VARS cipher->counter0 = 0; cipher->counter1 = 0; @@ -181,7 +199,12 @@ aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits) memset(cipher->ctr_buf.buf, 0, sizeof(cipher->ctr_buf.buf)); cipher->pos = 0; + +#ifdef USE_OPENSSL_CTR + memset(cipher->buf, 0, sizeof(cipher->buf)); +#else _aes_fill_buf(cipher); +#endif } /** Release storage held by cipher @@ -206,6 +229,18 @@ aes_free_cipher(aes_cnt_cipher_t *cipher) #define UPDATE_CTR_BUF(c, n) #endif +#ifdef USE_OPENSSL_CTR +/* Helper function to use EVP with openssl's counter-mode wrapper. */ +static void evp_block128_fn(const uint8_t in[16], + uint8_t out[16], + const void *key) +{ + EVP_CIPHER_CTX *ctx = (void*)key; + int inl=16, outl=16; + EVP_EncryptUpdate(ctx, out, &outl, in, inl); +} +#endif + /** Encrypt len bytes from input, storing the result in * output. Uses the key in cipher, and advances the counter * by len bytes as it encrypts. @@ -214,20 +249,29 @@ void aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len, char *output) { - /* This function alone is up to 5% of our runtime in some profiles; anything - * we could do to make it faster would be great. - * - * Experimenting suggests that unrolling the inner loop into a switch - * statement doesn't help. What does seem to help is making the input and - * output buffers word aligned, and never crypting anything besides an - * integer number of words at a time -- it shaves maybe 4-5% of the per-byte - * encryption time measured by bench_aes. We can't do that with the current - * Tor protocol, though: Tor really likes to crypt things in 509-byte - * chunks. - * - * If we were really ambitous, we'd force len to be a multiple of the block - * size, and shave maybe another 4-5% off. - */ +#ifdef USE_OPENSSL_CTR + if (cipher->using_evp) { + /* In openssl 1.0.0, there's an if'd out EVP_aes_128_ctr in evp.h. If + * it weren't disabled, it might be better just to use that. + */ + CRYPTO_ctr128_encrypt((const unsigned char *)input, + (unsigned char *)output, + len, + &cipher->key.evp, + cipher->ctr_buf.buf, + cipher->buf, + &cipher->pos, + evp_block128_fn); + } else { + AES_ctr128_encrypt((const unsigned char *)input, + (unsigned char *)output, + len, + &cipher->key.aes, + cipher->ctr_buf.buf, + cipher->buf, + &cipher->pos); + } +#else int c = cipher->pos; if (PREDICT_UNLIKELY(!len)) return; @@ -250,6 +294,7 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len, UPDATE_CTR_BUF(cipher, 0); _aes_fill_buf(cipher); } +#endif } /** Encrypt len bytes from input, storing the results in place. @@ -259,11 +304,9 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len, void aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len) { - - /* XXXX This function is up to 5% of our runtime in some profiles; - * we should look into unrolling some of the loops; taking advantage - * of alignment, using a bigger buffer, and so on. Not till after 0.1.2.x, - * though. */ +#ifdef USE_OPENSSL_CTR + aes_crypt(cipher, data, len, data); +#else int c = cipher->pos; if (PREDICT_UNLIKELY(!len)) return; @@ -286,6 +329,7 @@ aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len) UPDATE_CTR_BUF(cipher, 0); _aes_fill_buf(cipher); } +#endif } /** Reset the 128-bit counter of cipher to the 16-bit big-endian value @@ -302,6 +346,8 @@ aes_set_iv(aes_cnt_cipher_t *cipher, const char *iv) cipher->pos = 0; memcpy(cipher->ctr_buf.buf, iv, 16); +#ifndef USE_OPENSSL_CTR _aes_fill_buf(cipher); +#endif }