Use openssl's counter mode implementation when we have 1.0.0 or later

This shaves about 7% off our per-cell AES crypto time for me; the
effect for accelerated AES crypto should be even more, since the AES
calculation itself will make an even smaller portion of the
counter-mode performance.

(We don't want to do this for pre-1.0.0 OpenSSL, since our AES_CTR
implementation was actually faster than OpenSSL's there, by about
10%.)

Fixes issue #4526.
This commit is contained in:
Nick Mathewson 2011-11-20 21:43:14 -05:00
parent 8143074b3f
commit 9814019a54
2 changed files with 72 additions and 24 deletions

View File

@ -4,11 +4,13 @@
relatively few servers should still be on any version of OpenSSL
that doesn't have good optimized assembly AES.
o Major features:
o Major features (AES performance):
- Use OpenSSL's EVP interface for AES encryption, so that all
AES operations can use hardware acceleration (if present).
Resolves issue #4442.
- But only use the EVP interface when AES acceleration is enabled,
to avoid a performance regression. Resolves issue #4525.
- When using OpenSSL 1.0.0 or later, use OpenSSL's counter mode
implementation; it makes AES_CTR about 7% faster than our old one
(which was about 10% faster than the one OpenSSL used to provide).
Resolves issue #4526.

View File

@ -17,6 +17,11 @@
#include <openssl/aes.h>
#include <openssl/evp.h>
#include <openssl/engine.h>
#if OPENSSL_VERSION_NUMBER >= 0x10000000L
/* See comments about which counter mode implementation to use below. */
#include <openssl/modes.h>
#define USE_OPENSSL_CTR
#endif
#include "compat.h"
#include "aes.h"
#include "util.h"
@ -35,7 +40,13 @@
* faster than indirecting through the EVP layer.
*/
/* Include OpenSSL headers as needed. */
/* We have 2 strategies for counter mode: use our own, or use OpenSSL's.
*
* Here we have a counter mode that's faster than the one shipping with
* OpenSSL pre-1.0 (by about 10%!). But OpenSSL 1.0.0 added a counter mode
* implementation faster than the one here (by about 7%). So we pick which
* one to used based on the Openssl version above.
*/
/*======================================================================*/
/* Interface to AES code, and counter implementation */
@ -48,7 +59,7 @@ struct aes_cnt_cipher {
AES_KEY aes;
} key;
#if !defined(WORDS_BIGENDIAN)
#if !defined(WORDS_BIGENDIAN) && !defined(USE_OPENSSL_CTR)
#define USING_COUNTER_VARS
/** These four values, together, implement a 128-bit counter, with
* counter0 as the low-order word and counter3 as the high-order word. */
@ -70,7 +81,11 @@ struct aes_cnt_cipher {
/** The encrypted value of ctr_buf. */
uint8_t buf[16];
/** Our current stream position within buf. */
#ifdef USE_OPENSSL_CTR
unsigned int pos;
#else
uint8_t pos;
#endif
/** True iff we're using the evp implementation of this cipher. */
uint8_t using_evp;
@ -110,6 +125,7 @@ evaluate_evp_for_aes(int force_val)
return 0;
}
#ifndef USE_OPENSSL_CTR
#if !defined(USING_COUNTER_VARS)
#define COUNTER(c, n) ((c)->ctr_buf.buf32[3-(n)])
#else
@ -138,6 +154,7 @@ _aes_fill_buf(aes_cnt_cipher_t *cipher)
AES_encrypt(cipher->ctr_buf.buf, cipher->buf, &cipher->key.aes);
}
}
#endif
/**
* Return a newly allocated counter-mode AES128 cipher implementation.
@ -171,6 +188,7 @@ aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits)
AES_set_encrypt_key((const unsigned char *)key, key_bits, &cipher->key.aes);
cipher->using_evp = 0;
}
#ifdef USING_COUNTER_VARS
cipher->counter0 = 0;
cipher->counter1 = 0;
@ -181,7 +199,12 @@ aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits)
memset(cipher->ctr_buf.buf, 0, sizeof(cipher->ctr_buf.buf));
cipher->pos = 0;
#ifdef USE_OPENSSL_CTR
memset(cipher->buf, 0, sizeof(cipher->buf));
#else
_aes_fill_buf(cipher);
#endif
}
/** Release storage held by <b>cipher</b>
@ -206,6 +229,18 @@ aes_free_cipher(aes_cnt_cipher_t *cipher)
#define UPDATE_CTR_BUF(c, n)
#endif
#ifdef USE_OPENSSL_CTR
/* Helper function to use EVP with openssl's counter-mode wrapper. */
static void evp_block128_fn(const uint8_t in[16],
uint8_t out[16],
const void *key)
{
EVP_CIPHER_CTX *ctx = (void*)key;
int inl=16, outl=16;
EVP_EncryptUpdate(ctx, out, &outl, in, inl);
}
#endif
/** Encrypt <b>len</b> bytes from <b>input</b>, storing the result in
* <b>output</b>. Uses the key in <b>cipher</b>, and advances the counter
* by <b>len</b> bytes as it encrypts.
@ -214,20 +249,29 @@ void
aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
char *output)
{
/* This function alone is up to 5% of our runtime in some profiles; anything
* we could do to make it faster would be great.
*
* Experimenting suggests that unrolling the inner loop into a switch
* statement doesn't help. What does seem to help is making the input and
* output buffers word aligned, and never crypting anything besides an
* integer number of words at a time -- it shaves maybe 4-5% of the per-byte
* encryption time measured by bench_aes. We can't do that with the current
* Tor protocol, though: Tor really likes to crypt things in 509-byte
* chunks.
*
* If we were really ambitous, we'd force len to be a multiple of the block
* size, and shave maybe another 4-5% off.
*/
#ifdef USE_OPENSSL_CTR
if (cipher->using_evp) {
/* In openssl 1.0.0, there's an if'd out EVP_aes_128_ctr in evp.h. If
* it weren't disabled, it might be better just to use that.
*/
CRYPTO_ctr128_encrypt((const unsigned char *)input,
(unsigned char *)output,
len,
&cipher->key.evp,
cipher->ctr_buf.buf,
cipher->buf,
&cipher->pos,
evp_block128_fn);
} else {
AES_ctr128_encrypt((const unsigned char *)input,
(unsigned char *)output,
len,
&cipher->key.aes,
cipher->ctr_buf.buf,
cipher->buf,
&cipher->pos);
}
#else
int c = cipher->pos;
if (PREDICT_UNLIKELY(!len)) return;
@ -250,6 +294,7 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
UPDATE_CTR_BUF(cipher, 0);
_aes_fill_buf(cipher);
}
#endif
}
/** Encrypt <b>len</b> bytes from <b>input</b>, storing the results in place.
@ -259,11 +304,9 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
void
aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
{
/* XXXX This function is up to 5% of our runtime in some profiles;
* we should look into unrolling some of the loops; taking advantage
* of alignment, using a bigger buffer, and so on. Not till after 0.1.2.x,
* though. */
#ifdef USE_OPENSSL_CTR
aes_crypt(cipher, data, len, data);
#else
int c = cipher->pos;
if (PREDICT_UNLIKELY(!len)) return;
@ -286,6 +329,7 @@ aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
UPDATE_CTR_BUF(cipher, 0);
_aes_fill_buf(cipher);
}
#endif
}
/** Reset the 128-bit counter of <b>cipher</b> to the 16-bit big-endian value
@ -302,6 +346,8 @@ aes_set_iv(aes_cnt_cipher_t *cipher, const char *iv)
cipher->pos = 0;
memcpy(cipher->ctr_buf.buf, iv, 16);
#ifndef USE_OPENSSL_CTR
_aes_fill_buf(cipher);
#endif
}