From 7d1724e93b17c5095bb01c9f053600b3a85c3cdc Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Wed, 24 Jun 2020 22:29:09 +0200 Subject: [PATCH] Crypto: Fix random crash in Streebog in 32-bit, caused by use of aligned SSE2 instructions _mm_load_si128. Now we use _mm_loadu_si128 instead which is not slower than _mm_load_si128 on recent CPUs --- src/Crypto/Streebog.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Crypto/Streebog.c b/src/Crypto/Streebog.c index e443ecae..9a261a68 100644 --- a/src/Crypto/Streebog.c +++ b/src/Crypto/Streebog.c @@ -1995,10 +1995,10 @@ VC_INLINE __m128i _mm_set_epi64x_a(uint64 i0, uint64 i1) { #define LOAD(P, xmm0, xmm1, xmm2, xmm3) { \ const __m128i *__m128p = (const __m128i *) &P[0]; \ - xmm0 = _mm_load_si128(&__m128p[0]); \ - xmm1 = _mm_load_si128(&__m128p[1]); \ - xmm2 = _mm_load_si128(&__m128p[2]); \ - xmm3 = _mm_load_si128(&__m128p[3]); \ + xmm0 = _mm_loadu_si128(&__m128p[0]); \ + xmm1 = _mm_loadu_si128(&__m128p[1]); \ + xmm2 = _mm_loadu_si128(&__m128p[2]); \ + xmm3 = _mm_loadu_si128(&__m128p[3]); \ } #define UNLOAD(P, xmm0, xmm1, xmm2, xmm3) { \ @@ -2018,10 +2018,10 @@ VC_INLINE __m128i _mm_set_epi64x_a(uint64 i0, uint64 i1) { #define X128M(P, xmm0, xmm1, xmm2, xmm3) { \ const __m128i *__m128p = (const __m128i *) &P[0]; \ - xmm0 = _mm_xor_si128(xmm0, _mm_load_si128(&__m128p[0])); \ - xmm1 = _mm_xor_si128(xmm1, _mm_load_si128(&__m128p[1])); \ - xmm2 = _mm_xor_si128(xmm2, _mm_load_si128(&__m128p[2])); \ - xmm3 = _mm_xor_si128(xmm3, _mm_load_si128(&__m128p[3])); \ + xmm0 = _mm_xor_si128(xmm0, _mm_loadu_si128(&__m128p[0])); \ + xmm1 = _mm_xor_si128(xmm1, _mm_loadu_si128(&__m128p[1])); \ + xmm2 = _mm_xor_si128(xmm2, _mm_loadu_si128(&__m128p[2])); \ + xmm3 = _mm_xor_si128(xmm3, _mm_loadu_si128(&__m128p[3])); \ } #define _mm_xor_64(mm0, mm1) _mm_xor_si64(mm0, _mm_cvtsi64_m64(mm1))