mirror of
https://github.com/veracrypt/VeraCrypt
synced 2024-11-24 20:13:33 +01:00
Windows crypto: optimize XTS implementation for 64-bit builds using SSE2 thanks to simplification of storage of whitening values in memory (normal order instead of reverse order).
This commit is contained in:
parent
f3a98fda03
commit
d8d92357b0
170
src/Common/Xts.c
170
src/Common/Xts.c
@ -27,6 +27,8 @@ For big-endian platforms define BYTE_ORDER as BIG_ENDIAN. */
|
|||||||
# include <memory.h>
|
# include <memory.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "cpu.h"
|
||||||
|
#include "misc.h"
|
||||||
#include "Xts.h"
|
#include "Xts.h"
|
||||||
|
|
||||||
|
|
||||||
@ -56,6 +58,35 @@ void EncryptBufferXTS (unsigned __int8 *buffer,
|
|||||||
EncryptBufferXTSNonParallel (buffer, length, startDataUnitNo, startCipherBlockNo, ks, ks2, cipher);
|
EncryptBufferXTSNonParallel (buffer, length, startDataUnitNo, startCipherBlockNo, ks, ks2, cipher);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if (CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && CRYPTOPP_BOOL_X64)
|
||||||
|
|
||||||
|
#define XorBlocks(result,ptr,len,start,end) \
|
||||||
|
while (len >= 2) \
|
||||||
|
{ \
|
||||||
|
__m128i xmm1 = _mm_loadu_si128((const __m128i*) ptr); \
|
||||||
|
__m128i xmm2 = _mm_loadu_si128((__m128i*)result); \
|
||||||
|
__m128i xmm3 = _mm_loadu_si128((const __m128i*) (ptr + 2)); \
|
||||||
|
__m128i xmm4 = _mm_loadu_si128((__m128i*)(result + 2)); \
|
||||||
|
\
|
||||||
|
_mm_storeu_si128((__m128i*)result, _mm_xor_si128(xmm1, xmm2)); \
|
||||||
|
_mm_storeu_si128((__m128i*)(result + 2), _mm_xor_si128(xmm3, xmm4)); \
|
||||||
|
ptr+= 4; \
|
||||||
|
result+= 4; \
|
||||||
|
len -= 2; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
if (len) \
|
||||||
|
{ \
|
||||||
|
__m128i xmm1 = _mm_loadu_si128((const __m128i*)ptr); \
|
||||||
|
__m128i xmm2 = _mm_loadu_si128((__m128i*)result); \
|
||||||
|
\
|
||||||
|
_mm_storeu_si128((__m128i*)result, _mm_xor_si128(xmm1, xmm2)); \
|
||||||
|
ptr+= 2; \
|
||||||
|
result+= 2; \
|
||||||
|
} \
|
||||||
|
len = end - start;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
// Optimized for encryption algorithms supporting intra-data-unit parallelization
|
// Optimized for encryption algorithms supporting intra-data-unit parallelization
|
||||||
static void EncryptBufferXTSParallel (unsigned __int8 *buffer,
|
static void EncryptBufferXTSParallel (unsigned __int8 *buffer,
|
||||||
@ -74,9 +105,8 @@ static void EncryptBufferXTSParallel (unsigned __int8 *buffer,
|
|||||||
unsigned __int64 *whiteningValuePtr64 = (unsigned __int64 *) whiteningValue;
|
unsigned __int64 *whiteningValuePtr64 = (unsigned __int64 *) whiteningValue;
|
||||||
unsigned __int64 *bufPtr = (unsigned __int64 *) buffer;
|
unsigned __int64 *bufPtr = (unsigned __int64 *) buffer;
|
||||||
unsigned __int64 *dataUnitBufPtr;
|
unsigned __int64 *dataUnitBufPtr;
|
||||||
unsigned int startBlock = startCipherBlockNo, endBlock, block;
|
unsigned int startBlock = startCipherBlockNo, endBlock, block, countBlock;
|
||||||
unsigned __int64 *const finalInt64WhiteningValuesPtr = whiteningValuesPtr64 + sizeof (whiteningValues) / sizeof (*whiteningValuesPtr64) - 1;
|
TC_LARGEST_COMPILER_UINT remainingBlocks, dataUnitNo;
|
||||||
TC_LARGEST_COMPILER_UINT blockCount, dataUnitNo;
|
|
||||||
|
|
||||||
/* The encrypted data unit number (i.e. the resultant ciphertext block) is to be multiplied in the
|
/* The encrypted data unit number (i.e. the resultant ciphertext block) is to be multiplied in the
|
||||||
finite field GF(2^128) by j-th power of n, where j is the sequential plaintext/ciphertext block
|
finite field GF(2^128) by j-th power of n, where j is the sequential plaintext/ciphertext block
|
||||||
@ -94,17 +124,18 @@ static void EncryptBufferXTSParallel (unsigned __int8 *buffer,
|
|||||||
if (length % BYTES_PER_XTS_BLOCK)
|
if (length % BYTES_PER_XTS_BLOCK)
|
||||||
TC_THROW_FATAL_EXCEPTION;
|
TC_THROW_FATAL_EXCEPTION;
|
||||||
|
|
||||||
blockCount = length / BYTES_PER_XTS_BLOCK;
|
remainingBlocks = length / BYTES_PER_XTS_BLOCK;
|
||||||
|
|
||||||
// Process all blocks in the buffer
|
// Process all blocks in the buffer
|
||||||
while (blockCount > 0)
|
while (remainingBlocks > 0)
|
||||||
{
|
{
|
||||||
if (blockCount < BLOCKS_PER_XTS_DATA_UNIT)
|
if (remainingBlocks < BLOCKS_PER_XTS_DATA_UNIT)
|
||||||
endBlock = startBlock + (unsigned int) blockCount;
|
endBlock = startBlock + (unsigned int) remainingBlocks;
|
||||||
else
|
else
|
||||||
endBlock = BLOCKS_PER_XTS_DATA_UNIT;
|
endBlock = BLOCKS_PER_XTS_DATA_UNIT;
|
||||||
|
countBlock = endBlock - startBlock;
|
||||||
|
|
||||||
whiteningValuesPtr64 = finalInt64WhiteningValuesPtr;
|
whiteningValuesPtr64 = (unsigned __int64 *) whiteningValues;
|
||||||
whiteningValuePtr64 = (unsigned __int64 *) whiteningValue;
|
whiteningValuePtr64 = (unsigned __int64 *) whiteningValue;
|
||||||
|
|
||||||
// Encrypt the data unit number using the secondary key (in order to generate the first
|
// Encrypt the data unit number using the secondary key (in order to generate the first
|
||||||
@ -114,13 +145,13 @@ static void EncryptBufferXTSParallel (unsigned __int8 *buffer,
|
|||||||
EncipherBlock (cipher, whiteningValue, ks2);
|
EncipherBlock (cipher, whiteningValue, ks2);
|
||||||
|
|
||||||
// Generate subsequent whitening values for blocks in this data unit. Note that all generated 128-bit
|
// Generate subsequent whitening values for blocks in this data unit. Note that all generated 128-bit
|
||||||
// whitening values are stored in memory as a sequence of 64-bit integers in reverse order.
|
// whitening values are stored in memory as a sequence of 64-bit integers.
|
||||||
for (block = 0; block < endBlock; block++)
|
for (block = 0; block < endBlock; block++)
|
||||||
{
|
{
|
||||||
if (block >= startBlock)
|
if (block >= startBlock)
|
||||||
{
|
{
|
||||||
*whiteningValuesPtr64-- = *whiteningValuePtr64++;
|
*whiteningValuesPtr64++ = *whiteningValuePtr64++;
|
||||||
*whiteningValuesPtr64-- = *whiteningValuePtr64;
|
*whiteningValuesPtr64++ = *whiteningValuePtr64;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
whiteningValuePtr64++;
|
whiteningValuePtr64++;
|
||||||
@ -163,31 +194,37 @@ static void EncryptBufferXTSParallel (unsigned __int8 *buffer,
|
|||||||
}
|
}
|
||||||
|
|
||||||
dataUnitBufPtr = bufPtr;
|
dataUnitBufPtr = bufPtr;
|
||||||
whiteningValuesPtr64 = finalInt64WhiteningValuesPtr;
|
whiteningValuesPtr64 = (unsigned __int64 *) whiteningValues;
|
||||||
|
|
||||||
// Encrypt all blocks in this data unit
|
// Encrypt all blocks in this data unit
|
||||||
|
#if (CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && CRYPTOPP_BOOL_X64)
|
||||||
for (block = startBlock; block < endBlock; block++)
|
XorBlocks (bufPtr, whiteningValuesPtr64, countBlock, startBlock, endBlock);
|
||||||
|
#else
|
||||||
|
for (block = 0; block < countBlock; block++)
|
||||||
{
|
{
|
||||||
// Pre-whitening
|
// Pre-whitening
|
||||||
*bufPtr++ ^= *whiteningValuesPtr64--;
|
*bufPtr++ ^= *whiteningValuesPtr64++;
|
||||||
*bufPtr++ ^= *whiteningValuesPtr64--;
|
*bufPtr++ ^= *whiteningValuesPtr64++;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
// Actual encryption
|
// Actual encryption
|
||||||
EncipherBlocks (cipher, dataUnitBufPtr, ks, endBlock - startBlock);
|
EncipherBlocks (cipher, dataUnitBufPtr, ks, countBlock);
|
||||||
|
|
||||||
bufPtr = dataUnitBufPtr;
|
bufPtr = dataUnitBufPtr;
|
||||||
whiteningValuesPtr64 = finalInt64WhiteningValuesPtr;
|
whiteningValuesPtr64 = (unsigned __int64 *) whiteningValues;
|
||||||
|
|
||||||
for (block = startBlock; block < endBlock; block++)
|
#if (CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && CRYPTOPP_BOOL_X64)
|
||||||
|
XorBlocks (bufPtr, whiteningValuesPtr64, countBlock, startBlock, endBlock);
|
||||||
|
#else
|
||||||
|
for (block = 0; block < countBlock; block++)
|
||||||
{
|
{
|
||||||
// Post-whitening
|
// Post-whitening
|
||||||
*bufPtr++ ^= *whiteningValuesPtr64--;
|
*bufPtr++ ^= *whiteningValuesPtr64++;
|
||||||
*bufPtr++ ^= *whiteningValuesPtr64--;
|
*bufPtr++ ^= *whiteningValuesPtr64++;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
blockCount -= endBlock - startBlock;
|
remainingBlocks -= countBlock;
|
||||||
startBlock = 0;
|
startBlock = 0;
|
||||||
dataUnitNo++;
|
dataUnitNo++;
|
||||||
*((unsigned __int64 *) byteBufUnitNo) = LE64 (dataUnitNo);
|
*((unsigned __int64 *) byteBufUnitNo) = LE64 (dataUnitNo);
|
||||||
@ -256,15 +293,31 @@ static void EncryptBufferXTSNonParallel (unsigned __int8 *buffer,
|
|||||||
if (block >= startBlock)
|
if (block >= startBlock)
|
||||||
{
|
{
|
||||||
// Pre-whitening
|
// Pre-whitening
|
||||||
|
#if (CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && CRYPTOPP_BOOL_X64)
|
||||||
|
__m128i xmm1 = _mm_loadu_si128((const __m128i*)whiteningValuePtr64);
|
||||||
|
__m128i xmm2 = _mm_loadu_si128((__m128i*)bufPtr);
|
||||||
|
|
||||||
|
_mm_storeu_si128((__m128i*)bufPtr, _mm_xor_si128(xmm1, xmm2));
|
||||||
|
#else
|
||||||
*bufPtr++ ^= *whiteningValuePtr64++;
|
*bufPtr++ ^= *whiteningValuePtr64++;
|
||||||
*bufPtr-- ^= *whiteningValuePtr64--;
|
*bufPtr-- ^= *whiteningValuePtr64--;
|
||||||
|
#endif
|
||||||
// Actual encryption
|
// Actual encryption
|
||||||
EncipherBlock (cipher, bufPtr, ks);
|
EncipherBlock (cipher, bufPtr, ks);
|
||||||
|
|
||||||
// Post-whitening
|
// Post-whitening
|
||||||
|
#if (CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && CRYPTOPP_BOOL_X64)
|
||||||
|
xmm1 = _mm_loadu_si128((const __m128i*)whiteningValuePtr64);
|
||||||
|
xmm2 = _mm_loadu_si128((__m128i*)bufPtr);
|
||||||
|
|
||||||
|
_mm_storeu_si128((__m128i*)bufPtr, _mm_xor_si128(xmm1, xmm2));
|
||||||
|
|
||||||
|
whiteningValuePtr64++;
|
||||||
|
bufPtr += 2;
|
||||||
|
#else
|
||||||
*bufPtr++ ^= *whiteningValuePtr64++;
|
*bufPtr++ ^= *whiteningValuePtr64++;
|
||||||
*bufPtr++ ^= *whiteningValuePtr64;
|
*bufPtr++ ^= *whiteningValuePtr64;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
whiteningValuePtr64++;
|
whiteningValuePtr64++;
|
||||||
@ -349,9 +402,8 @@ static void DecryptBufferXTSParallel (unsigned __int8 *buffer,
|
|||||||
unsigned __int64 *whiteningValuePtr64 = (unsigned __int64 *) whiteningValue;
|
unsigned __int64 *whiteningValuePtr64 = (unsigned __int64 *) whiteningValue;
|
||||||
unsigned __int64 *bufPtr = (unsigned __int64 *) buffer;
|
unsigned __int64 *bufPtr = (unsigned __int64 *) buffer;
|
||||||
unsigned __int64 *dataUnitBufPtr;
|
unsigned __int64 *dataUnitBufPtr;
|
||||||
unsigned int startBlock = startCipherBlockNo, endBlock, block;
|
unsigned int startBlock = startCipherBlockNo, endBlock, block, countBlock;
|
||||||
unsigned __int64 *const finalInt64WhiteningValuesPtr = whiteningValuesPtr64 + sizeof (whiteningValues) / sizeof (*whiteningValuesPtr64) - 1;
|
TC_LARGEST_COMPILER_UINT remainingBlocks, dataUnitNo;
|
||||||
TC_LARGEST_COMPILER_UINT blockCount, dataUnitNo;
|
|
||||||
|
|
||||||
// Convert the 64-bit data unit number into a little-endian 16-byte array.
|
// Convert the 64-bit data unit number into a little-endian 16-byte array.
|
||||||
// Note that as we are converting a 64-bit number into a 16-byte array we can always zero the last 8 bytes.
|
// Note that as we are converting a 64-bit number into a 16-byte array we can always zero the last 8 bytes.
|
||||||
@ -362,17 +414,18 @@ static void DecryptBufferXTSParallel (unsigned __int8 *buffer,
|
|||||||
if (length % BYTES_PER_XTS_BLOCK)
|
if (length % BYTES_PER_XTS_BLOCK)
|
||||||
TC_THROW_FATAL_EXCEPTION;
|
TC_THROW_FATAL_EXCEPTION;
|
||||||
|
|
||||||
blockCount = length / BYTES_PER_XTS_BLOCK;
|
remainingBlocks = length / BYTES_PER_XTS_BLOCK;
|
||||||
|
|
||||||
// Process all blocks in the buffer
|
// Process all blocks in the buffer
|
||||||
while (blockCount > 0)
|
while (remainingBlocks > 0)
|
||||||
{
|
{
|
||||||
if (blockCount < BLOCKS_PER_XTS_DATA_UNIT)
|
if (remainingBlocks < BLOCKS_PER_XTS_DATA_UNIT)
|
||||||
endBlock = startBlock + (unsigned int) blockCount;
|
endBlock = startBlock + (unsigned int) remainingBlocks;
|
||||||
else
|
else
|
||||||
endBlock = BLOCKS_PER_XTS_DATA_UNIT;
|
endBlock = BLOCKS_PER_XTS_DATA_UNIT;
|
||||||
|
countBlock = endBlock - startBlock;
|
||||||
|
|
||||||
whiteningValuesPtr64 = finalInt64WhiteningValuesPtr;
|
whiteningValuesPtr64 = (unsigned __int64 *) whiteningValues;
|
||||||
whiteningValuePtr64 = (unsigned __int64 *) whiteningValue;
|
whiteningValuePtr64 = (unsigned __int64 *) whiteningValue;
|
||||||
|
|
||||||
// Encrypt the data unit number using the secondary key (in order to generate the first
|
// Encrypt the data unit number using the secondary key (in order to generate the first
|
||||||
@ -382,13 +435,13 @@ static void DecryptBufferXTSParallel (unsigned __int8 *buffer,
|
|||||||
EncipherBlock (cipher, whiteningValue, ks2);
|
EncipherBlock (cipher, whiteningValue, ks2);
|
||||||
|
|
||||||
// Generate subsequent whitening values for blocks in this data unit. Note that all generated 128-bit
|
// Generate subsequent whitening values for blocks in this data unit. Note that all generated 128-bit
|
||||||
// whitening values are stored in memory as a sequence of 64-bit integers in reverse order.
|
// whitening values are stored in memory as a sequence of 64-bit integers.
|
||||||
for (block = 0; block < endBlock; block++)
|
for (block = 0; block < endBlock; block++)
|
||||||
{
|
{
|
||||||
if (block >= startBlock)
|
if (block >= startBlock)
|
||||||
{
|
{
|
||||||
*whiteningValuesPtr64-- = *whiteningValuePtr64++;
|
*whiteningValuesPtr64++ = *whiteningValuePtr64++;
|
||||||
*whiteningValuesPtr64-- = *whiteningValuePtr64;
|
*whiteningValuesPtr64++ = *whiteningValuePtr64;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
whiteningValuePtr64++;
|
whiteningValuePtr64++;
|
||||||
@ -431,28 +484,33 @@ static void DecryptBufferXTSParallel (unsigned __int8 *buffer,
|
|||||||
}
|
}
|
||||||
|
|
||||||
dataUnitBufPtr = bufPtr;
|
dataUnitBufPtr = bufPtr;
|
||||||
whiteningValuesPtr64 = finalInt64WhiteningValuesPtr;
|
whiteningValuesPtr64 = (unsigned __int64 *) whiteningValues;
|
||||||
|
|
||||||
// Decrypt blocks in this data unit
|
// Decrypt blocks in this data unit
|
||||||
|
#if (CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && CRYPTOPP_BOOL_X64)
|
||||||
for (block = startBlock; block < endBlock; block++)
|
XorBlocks (bufPtr, whiteningValuesPtr64, countBlock, startBlock, endBlock);
|
||||||
|
#else
|
||||||
|
for (block = 0; block < countBlock; block++)
|
||||||
{
|
{
|
||||||
*bufPtr++ ^= *whiteningValuesPtr64--;
|
*bufPtr++ ^= *whiteningValuesPtr64++;
|
||||||
*bufPtr++ ^= *whiteningValuesPtr64--;
|
*bufPtr++ ^= *whiteningValuesPtr64++;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
DecipherBlocks (cipher, dataUnitBufPtr, ks, endBlock - startBlock);
|
DecipherBlocks (cipher, dataUnitBufPtr, ks, endBlock - startBlock);
|
||||||
|
|
||||||
bufPtr = dataUnitBufPtr;
|
bufPtr = dataUnitBufPtr;
|
||||||
whiteningValuesPtr64 = finalInt64WhiteningValuesPtr;
|
whiteningValuesPtr64 = (unsigned __int64 *) whiteningValues;
|
||||||
|
|
||||||
for (block = startBlock; block < endBlock; block++)
|
#if (CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && CRYPTOPP_BOOL_X64)
|
||||||
|
XorBlocks (bufPtr, whiteningValuesPtr64, countBlock, startBlock, endBlock);
|
||||||
|
#else
|
||||||
|
for (block = 0; block < countBlock; block++)
|
||||||
{
|
{
|
||||||
*bufPtr++ ^= *whiteningValuesPtr64--;
|
*bufPtr++ ^= *whiteningValuesPtr64++;
|
||||||
*bufPtr++ ^= *whiteningValuesPtr64--;
|
*bufPtr++ ^= *whiteningValuesPtr64++;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
blockCount -= endBlock - startBlock;
|
remainingBlocks -= countBlock;
|
||||||
startBlock = 0;
|
startBlock = 0;
|
||||||
dataUnitNo++;
|
dataUnitNo++;
|
||||||
|
|
||||||
@ -515,15 +573,31 @@ static void DecryptBufferXTSNonParallel (unsigned __int8 *buffer,
|
|||||||
if (block >= startBlock)
|
if (block >= startBlock)
|
||||||
{
|
{
|
||||||
// Post-whitening
|
// Post-whitening
|
||||||
|
#if (CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && CRYPTOPP_BOOL_X64)
|
||||||
|
__m128i xmm1 = _mm_loadu_si128((const __m128i*)whiteningValuePtr64);
|
||||||
|
__m128i xmm2 = _mm_loadu_si128((__m128i*)bufPtr);
|
||||||
|
|
||||||
|
_mm_storeu_si128((__m128i*)bufPtr, _mm_xor_si128(xmm1, xmm2));
|
||||||
|
#else
|
||||||
*bufPtr++ ^= *whiteningValuePtr64++;
|
*bufPtr++ ^= *whiteningValuePtr64++;
|
||||||
*bufPtr-- ^= *whiteningValuePtr64--;
|
*bufPtr-- ^= *whiteningValuePtr64--;
|
||||||
|
#endif
|
||||||
// Actual decryption
|
// Actual decryption
|
||||||
DecipherBlock (cipher, bufPtr, ks);
|
DecipherBlock (cipher, bufPtr, ks);
|
||||||
|
|
||||||
// Pre-whitening
|
// Pre-whitening
|
||||||
|
#if (CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && CRYPTOPP_BOOL_X64)
|
||||||
|
xmm1 = _mm_loadu_si128((const __m128i*)whiteningValuePtr64);
|
||||||
|
xmm2 = _mm_loadu_si128((__m128i*)bufPtr);
|
||||||
|
|
||||||
|
_mm_storeu_si128((__m128i*)bufPtr, _mm_xor_si128(xmm1, xmm2));
|
||||||
|
|
||||||
|
whiteningValuePtr64++;
|
||||||
|
bufPtr += 2;
|
||||||
|
#else
|
||||||
*bufPtr++ ^= *whiteningValuePtr64++;
|
*bufPtr++ ^= *whiteningValuePtr64++;
|
||||||
*bufPtr++ ^= *whiteningValuePtr64;
|
*bufPtr++ ^= *whiteningValuePtr64;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
whiteningValuePtr64++;
|
whiteningValuePtr64++;
|
||||||
|
Loading…
Reference in New Issue
Block a user