mirror of
https://github.com/veracrypt/VeraCrypt
synced 2024-11-30 23:13:31 +01:00
Crypto: Fix random crash in Streebog in 32-bit, caused by use of aligned SSE2 instructions _mm_load_si128. Now we use _mm_loadu_si128 instead which is not slower than _mm_load_si128 on recent CPUs
This commit is contained in:
parent
cb1efdc9ae
commit
7d1724e93b
@ -1995,10 +1995,10 @@ VC_INLINE __m128i _mm_set_epi64x_a(uint64 i0, uint64 i1) {
|
|||||||
|
|
||||||
#define LOAD(P, xmm0, xmm1, xmm2, xmm3) { \
|
#define LOAD(P, xmm0, xmm1, xmm2, xmm3) { \
|
||||||
const __m128i *__m128p = (const __m128i *) &P[0]; \
|
const __m128i *__m128p = (const __m128i *) &P[0]; \
|
||||||
xmm0 = _mm_load_si128(&__m128p[0]); \
|
xmm0 = _mm_loadu_si128(&__m128p[0]); \
|
||||||
xmm1 = _mm_load_si128(&__m128p[1]); \
|
xmm1 = _mm_loadu_si128(&__m128p[1]); \
|
||||||
xmm2 = _mm_load_si128(&__m128p[2]); \
|
xmm2 = _mm_loadu_si128(&__m128p[2]); \
|
||||||
xmm3 = _mm_load_si128(&__m128p[3]); \
|
xmm3 = _mm_loadu_si128(&__m128p[3]); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define UNLOAD(P, xmm0, xmm1, xmm2, xmm3) { \
|
#define UNLOAD(P, xmm0, xmm1, xmm2, xmm3) { \
|
||||||
@ -2018,10 +2018,10 @@ VC_INLINE __m128i _mm_set_epi64x_a(uint64 i0, uint64 i1) {
|
|||||||
|
|
||||||
#define X128M(P, xmm0, xmm1, xmm2, xmm3) { \
|
#define X128M(P, xmm0, xmm1, xmm2, xmm3) { \
|
||||||
const __m128i *__m128p = (const __m128i *) &P[0]; \
|
const __m128i *__m128p = (const __m128i *) &P[0]; \
|
||||||
xmm0 = _mm_xor_si128(xmm0, _mm_load_si128(&__m128p[0])); \
|
xmm0 = _mm_xor_si128(xmm0, _mm_loadu_si128(&__m128p[0])); \
|
||||||
xmm1 = _mm_xor_si128(xmm1, _mm_load_si128(&__m128p[1])); \
|
xmm1 = _mm_xor_si128(xmm1, _mm_loadu_si128(&__m128p[1])); \
|
||||||
xmm2 = _mm_xor_si128(xmm2, _mm_load_si128(&__m128p[2])); \
|
xmm2 = _mm_xor_si128(xmm2, _mm_loadu_si128(&__m128p[2])); \
|
||||||
xmm3 = _mm_xor_si128(xmm3, _mm_load_si128(&__m128p[3])); \
|
xmm3 = _mm_xor_si128(xmm3, _mm_loadu_si128(&__m128p[3])); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm_xor_64(mm0, mm1) _mm_xor_si64(mm0, _mm_cvtsi64_m64(mm1))
|
#define _mm_xor_64(mm0, mm1) _mm_xor_si64(mm0, _mm_cvtsi64_m64(mm1))
|
||||||
|
Loading…
Reference in New Issue
Block a user