mirror of
https://github.com/veracrypt/VeraCrypt
synced 2024-11-14 23:23:30 +01:00
1445 lines
38 KiB
NASM
1445 lines
38 KiB
NASM
|
|
; ---------------------------------------------------------------------------
|
|
; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
|
|
;
|
|
; LICENSE TERMS
|
|
;
|
|
; The free distribution and use of this software is allowed (with or without
|
|
; changes) provided that:
|
|
;
|
|
; 1. source code distributions include the above copyright notice, this
|
|
; list of conditions and the following disclaimer;
|
|
;
|
|
; 2. binary distributions include the above copyright notice, this list
|
|
; of conditions and the following disclaimer in their documentation;
|
|
;
|
|
; 3. the name of the copyright holder is not used to endorse products
|
|
; built using this software without specific written permission.
|
|
;
|
|
; DISCLAIMER
|
|
;
|
|
; This software is provided 'as is' with no explicit or implied warranties
|
|
; in respect of its properties, including, but not limited to, correctness
|
|
; and/or fitness for purpose.
|
|
; ---------------------------------------------------------------------------
|
|
; Issue 20/12/2007
|
|
;
|
|
; This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h
|
|
; and the same define to be set here as well. If AES_V2C is set this file
|
|
; requires the C files aeskey.c and aestab.c for support.
|
|
|
|
; An AES implementation for x86 processors using the YASM (or NASM) assembler.
|
|
; This is a full assembler implementation covering encryption, decryption and
|
|
; key scheduling. It uses 2k bytes of tables but its encryption and decryption
|
|
; performance is very close to that obtained using large tables. Key schedule
|
|
; expansion is slower for both encryption and decryption but this is likely to
|
|
; be offset by the much smaller load that this version places on the processor
|
|
; cache. I acknowledge the contribution made by Daniel Bernstein to aspects of
|
|
; the design of the AES round function used here.
|
|
;
|
|
; This code provides the standard AES block size (128 bits, 16 bytes) and the
|
|
; three standard AES key sizes (128, 192 and 256 bits). It has the same call
|
|
; interface as my C implementation. The ebx, esi, edi and ebp registers are
|
|
; preserved across calls but eax, ecx and edx and the artihmetic status flags
|
|
; are not. Although this is a full assembler implementation, it can be used
|
|
; in conjunction with my C code which provides faster key scheduling using
|
|
; large tables. In this case aeskey.c should be compiled with ASM_X86_V2C
|
|
; defined. It is also important that the defines below match those used in the
|
|
; C code. This code uses the VC++ register saving conentions; if it is used
|
|
; with another compiler, conventions for using and saving registers may need
|
|
; to be checked (and calling conventions). The YASM command line for the VC++
|
|
; custom build step is:
|
|
;
|
|
; yasm -Xvc -f win32 -D <Z> -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
|
|
;
|
|
; For the cryptlib build this is (pcg):
|
|
;
|
|
; yasm -Xvc -f win32 -D ASM_X86_V2C -o aescrypt2.obj aes_x86_v2.asm
|
|
;
|
|
; where <Z> is ASM_X86_V2 or ASM_X86_V2C. The calling intefaces are:
|
|
;
|
|
; AES_RETURN aes_encrypt(const unsigned char in_blk[],
|
|
; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
|
|
;
|
|
; AES_RETURN aes_decrypt(const unsigned char in_blk[],
|
|
; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
|
|
;
|
|
; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
|
|
; const aes_encrypt_ctx cx[1]);
|
|
;
|
|
; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
|
|
; const aes_decrypt_ctx cx[1]);
|
|
;
|
|
; AES_RETURN aes_encrypt_key(const unsigned char key[],
|
|
; unsigned int len, const aes_decrypt_ctx cx[1]);
|
|
;
|
|
; AES_RETURN aes_decrypt_key(const unsigned char key[],
|
|
; unsigned int len, const aes_decrypt_ctx cx[1]);
|
|
;
|
|
; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
|
|
; either bits or bytes.
|
|
|
|
; The DLL interface must use the _stdcall convention in which the number
|
|
; of bytes of parameter space is added after an @ to the sutine's name.
|
|
; We must also remove our parameters from the stack before return (see
|
|
; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
|
|
|
|
;
|
|
; Adapted for TrueCrypt:
|
|
; - All tables generated at run-time
|
|
; - Adapted for 16-bit environment
|
|
;
|
|
|
|
CPU 386
|
|
USE16
|
|
SEGMENT _TEXT PUBLIC CLASS=CODE USE16
|
|
SEGMENT _DATA PUBLIC CLASS=DATA USE16
|
|
|
|
GROUP DGROUP _TEXT _DATA
|
|
|
|
extern _aes_dec_tab ; Aestab.c
|
|
extern _aes_enc_tab
|
|
|
|
; %define DLL_EXPORT
|
|
|
|
; The size of the code can be reduced by using functions for the encryption
|
|
; and decryption rounds in place of macro expansion
|
|
|
|
%define REDUCE_CODE_SIZE
|
|
|
|
; Comment in/out the following lines to obtain the desired subroutines. These
|
|
; selections MUST match those in the C header file aes.h
|
|
|
|
; %define AES_128 ; define if AES with 128 bit keys is needed
|
|
; %define AES_192 ; define if AES with 192 bit keys is needed
|
|
%define AES_256 ; define if AES with 256 bit keys is needed
|
|
; %define AES_VAR ; define if a variable key size is needed
|
|
%define ENCRYPTION ; define if encryption is needed
|
|
%define DECRYPTION ; define if decryption is needed
|
|
; %define AES_REV_DKS ; define if key decryption schedule is reversed
|
|
|
|
%ifndef ASM_X86_V2C
|
|
%define ENCRYPTION_KEY_SCHEDULE ; define if encryption key expansion is needed
|
|
%define DECRYPTION_KEY_SCHEDULE ; define if decryption key expansion is needed
|
|
%endif
|
|
|
|
; The encryption key schedule has the following in memory layout where N is the
|
|
; number of rounds (10, 12 or 14):
|
|
;
|
|
; lo: | input key (round 0) | ; each round is four 32-bit words
|
|
; | encryption round 1 |
|
|
; | encryption round 2 |
|
|
; ....
|
|
; | encryption round N-1 |
|
|
; hi: | encryption round N |
|
|
;
|
|
; The decryption key schedule is normally set up so that it has the same
|
|
; layout as above by actually reversing the order of the encryption key
|
|
; schedule in memory (this happens when AES_REV_DKS is set):
|
|
;
|
|
; lo: | decryption round 0 | = | encryption round N |
|
|
; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
|
|
; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
|
|
; .... ....
|
|
; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
|
|
; hi: | decryption round N | = | input key (round 0) |
|
|
;
|
|
; with rounds except the first and last modified using inv_mix_column()
|
|
; But if AES_REV_DKS is NOT set the order of keys is left as it is for
|
|
; encryption so that it has to be accessed in reverse when used for
|
|
; decryption (although the inverse mix column modifications are done)
|
|
;
|
|
; lo: | decryption round 0 | = | input key (round 0) |
|
|
; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
|
|
; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
|
|
; .... ....
|
|
; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
|
|
; hi: | decryption round N | = | encryption round N |
|
|
;
|
|
; This layout is faster when the assembler key scheduling provided here
|
|
; is used.
|
|
;
|
|
; End of user defines
|
|
|
|
%ifdef AES_VAR
|
|
%ifndef AES_128
|
|
%define AES_128
|
|
%endif
|
|
%ifndef AES_192
|
|
%define AES_192
|
|
%endif
|
|
%ifndef AES_256
|
|
%define AES_256
|
|
%endif
|
|
%endif
|
|
|
|
%ifdef AES_VAR
|
|
%define KS_LENGTH 60
|
|
%elifdef AES_256
|
|
%define KS_LENGTH 60
|
|
%elifdef AES_192
|
|
%define KS_LENGTH 52
|
|
%else
|
|
%define KS_LENGTH 44
|
|
%endif
|
|
|
|
; These macros implement stack based local variables
|
|
|
|
%macro save 2
|
|
mov [esp+4*%1],%2
|
|
%endmacro
|
|
|
|
%macro restore 2
|
|
mov %1,[esp+4*%2]
|
|
%endmacro
|
|
|
|
%ifdef REDUCE_CODE_SIZE
|
|
%macro mf_call 1
|
|
call %1
|
|
%endmacro
|
|
%else
|
|
%macro mf_call 1
|
|
%1
|
|
%endmacro
|
|
%endif
|
|
|
|
; the DLL has to implement the _stdcall calling interface on return
|
|
; In this case we have to take our parameters (3 4-byte pointers)
|
|
; off the stack
|
|
|
|
%define parms 12
|
|
|
|
%macro do_name 1-2 parms
|
|
%ifndef DLL_EXPORT
|
|
global %1
|
|
%1:
|
|
%else
|
|
global %1@%2
|
|
export %1@%2
|
|
%1@%2:
|
|
%endif
|
|
%endmacro
|
|
|
|
%macro do_call 1-2 parms
|
|
%ifndef DLL_EXPORT
|
|
call %1
|
|
add esp,%2
|
|
%else
|
|
call %1@%2
|
|
%endif
|
|
%endmacro
|
|
|
|
%macro do_exit 0-1 parms
|
|
%ifdef DLL_EXPORT
|
|
ret %1
|
|
%else
|
|
ret
|
|
%endif
|
|
%endmacro
|
|
|
|
; finite field multiplies by {02}, {04} and {08}
|
|
|
|
%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
|
|
%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
|
|
%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
|
|
|
|
; finite field multiplies required in table generation
|
|
|
|
%define f3(x) (f2(x) ^ x)
|
|
%define f9(x) (f8(x) ^ x)
|
|
%define fb(x) (f8(x) ^ f2(x) ^ x)
|
|
%define fd(x) (f8(x) ^ f4(x) ^ x)
|
|
%define fe(x) (f8(x) ^ f4(x) ^ f2(x))
|
|
|
|
%define etab_0(x) [_aes_enc_tab+4+8*x]
|
|
%define etab_1(x) [_aes_enc_tab+3+8*x]
|
|
%define etab_2(x) [_aes_enc_tab+2+8*x]
|
|
%define etab_3(x) [_aes_enc_tab+1+8*x]
|
|
%define etab_b(x) byte [_aes_enc_tab+1+8*x] ; used with movzx for 0x000000xx
|
|
%define etab_w(x) word [_aes_enc_tab+8*x] ; used with movzx for 0x0000xx00
|
|
|
|
%define btab_0(x) [_aes_enc_tab+6+8*x]
|
|
%define btab_1(x) [_aes_enc_tab+5+8*x]
|
|
%define btab_2(x) [_aes_enc_tab+4+8*x]
|
|
%define btab_3(x) [_aes_enc_tab+3+8*x]
|
|
|
|
; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
|
|
; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
|
|
;
|
|
; Input:
|
|
;
|
|
; EAX column[0]
|
|
; EBX column[1]
|
|
; ECX column[2]
|
|
; EDX column[3]
|
|
; ESI column key[round][2]
|
|
; EDI column key[round][3]
|
|
; EBP scratch
|
|
;
|
|
; Output:
|
|
;
|
|
; EBP column[0] unkeyed
|
|
; EBX column[1] unkeyed
|
|
; ESI column[2] keyed
|
|
; EDI column[3] keyed
|
|
; EAX scratch
|
|
; ECX scratch
|
|
; EDX scratch
|
|
|
|
%macro rnd_fun 2
|
|
|
|
rol ebx,16
|
|
%1 esi, cl, 0, ebp
|
|
%1 esi, dh, 1, ebp
|
|
%1 esi, bh, 3, ebp
|
|
%1 edi, dl, 0, ebp
|
|
%1 edi, ah, 1, ebp
|
|
%1 edi, bl, 2, ebp
|
|
%2 ebp, al, 0, ebp
|
|
shr ebx,16
|
|
and eax,0xffff0000
|
|
or eax,ebx
|
|
shr edx,16
|
|
%1 ebp, ah, 1, ebx
|
|
%1 ebp, dh, 3, ebx
|
|
%2 ebx, dl, 2, ebx
|
|
%1 ebx, ch, 1, edx
|
|
%1 ebx, al, 0, edx
|
|
shr eax,16
|
|
shr ecx,16
|
|
%1 ebp, cl, 2, edx
|
|
%1 edi, ch, 3, edx
|
|
%1 esi, al, 2, edx
|
|
%1 ebx, ah, 3, edx
|
|
|
|
%endmacro
|
|
|
|
; Basic MOV and XOR Operations for normal rounds
|
|
|
|
%macro nr_xor 4
|
|
movzx %4,%2
|
|
xor %1,etab_%3(%4)
|
|
%endmacro
|
|
|
|
%macro nr_mov 4
|
|
movzx %4,%2
|
|
mov %1,etab_%3(%4)
|
|
%endmacro
|
|
|
|
; Basic MOV and XOR Operations for last round
|
|
|
|
%if 1
|
|
|
|
%macro lr_xor 4
|
|
movzx %4,%2
|
|
movzx %4,etab_b(%4)
|
|
%if %3 != 0
|
|
shl %4,8*%3
|
|
%endif
|
|
xor %1,%4
|
|
%endmacro
|
|
|
|
%macro lr_mov 4
|
|
movzx %4,%2
|
|
movzx %1,etab_b(%4)
|
|
%if %3 != 0
|
|
shl %1,8*%3
|
|
%endif
|
|
%endmacro
|
|
|
|
%else ; less effective but worth leaving as an option
|
|
|
|
%macro lr_xor 4
|
|
movzx %4,%2
|
|
mov %4,btab_%3(%4)
|
|
and %4,0x000000ff << 8 * %3
|
|
xor %1,%4
|
|
%endmacro
|
|
|
|
%macro lr_mov 4
|
|
movzx %4,%2
|
|
mov %1,btab_%3(%4)
|
|
and %1,0x000000ff << 8 * %3
|
|
%endmacro
|
|
|
|
%endif
|
|
|
|
; Apply S-Box to the 4 bytes in a 32-bit word and rotate byte positions
|
|
|
|
%ifdef REDUCE_CODE_SIZE
|
|
|
|
l3s_col:
|
|
movzx ecx,al ; in eax
|
|
movzx ecx, etab_b(ecx) ; out eax
|
|
xor edx,ecx ; scratch ecx,edx
|
|
movzx ecx,ah
|
|
movzx ecx, etab_b(ecx)
|
|
shl ecx,8
|
|
xor edx,ecx
|
|
shr eax,16
|
|
movzx ecx,al
|
|
movzx ecx, etab_b(ecx)
|
|
shl ecx,16
|
|
xor edx,ecx
|
|
movzx ecx,ah
|
|
movzx ecx, etab_b(ecx)
|
|
shl ecx,24
|
|
xor edx,ecx
|
|
mov eax,edx
|
|
ret
|
|
|
|
%else
|
|
|
|
%macro l3s_col 0
|
|
|
|
movzx ecx,al ; in eax
|
|
movzx ecx, etab_b(ecx) ; out eax
|
|
xor edx,ecx ; scratch ecx,edx
|
|
movzx ecx,ah
|
|
movzx ecx, etab_b(ecx)
|
|
shl ecx,8
|
|
xor edx,ecx
|
|
shr eax,16
|
|
movzx ecx,al
|
|
movzx ecx, etab_b(ecx)
|
|
shl ecx,16
|
|
xor edx,ecx
|
|
movzx ecx,ah
|
|
movzx ecx, etab_b(ecx)
|
|
shl ecx,24
|
|
xor edx,ecx
|
|
mov eax,edx
|
|
|
|
%endmacro
|
|
|
|
%endif
|
|
|
|
; offsets to parameters
|
|
|
|
in_blk equ 2 ; input byte array address parameter
|
|
out_blk equ 4 ; output byte array address parameter
|
|
ctx equ 6 ; AES context structure
|
|
stk_spc equ 20 ; stack space
|
|
|
|
%ifdef ENCRYPTION
|
|
|
|
; %define ENCRYPTION_TABLE
|
|
|
|
%ifdef REDUCE_CODE_SIZE
|
|
|
|
enc_round:
|
|
sub sp, 2
|
|
add ebp,16
|
|
save 1,ebp
|
|
mov esi,[ebp+8]
|
|
mov edi,[ebp+12]
|
|
|
|
rnd_fun nr_xor, nr_mov
|
|
|
|
mov eax,ebp
|
|
mov ecx,esi
|
|
mov edx,edi
|
|
restore ebp,1
|
|
xor eax,[ebp]
|
|
xor ebx,[ebp+4]
|
|
add sp, 2
|
|
ret
|
|
|
|
%else
|
|
|
|
%macro enc_round 0
|
|
|
|
add ebp,16
|
|
save 0,ebp
|
|
mov esi,[ebp+8]
|
|
mov edi,[ebp+12]
|
|
|
|
rnd_fun nr_xor, nr_mov
|
|
|
|
mov eax,ebp
|
|
mov ecx,esi
|
|
mov edx,edi
|
|
restore ebp,0
|
|
xor eax,[ebp]
|
|
xor ebx,[ebp+4]
|
|
|
|
%endmacro
|
|
|
|
%endif
|
|
|
|
%macro enc_last_round 0
|
|
|
|
add ebp,16
|
|
save 0,ebp
|
|
mov esi,[ebp+8]
|
|
mov edi,[ebp+12]
|
|
|
|
rnd_fun lr_xor, lr_mov
|
|
|
|
mov eax,ebp
|
|
restore ebp,0
|
|
xor eax,[ebp]
|
|
xor ebx,[ebp+4]
|
|
|
|
%endmacro
|
|
|
|
section _TEXT
|
|
|
|
; AES Encryption Subroutine
|
|
|
|
do_name _aes_encrypt,12
|
|
|
|
mov ax, sp
|
|
movzx esp, ax
|
|
|
|
sub esp,stk_spc
|
|
mov [esp+16],ebp
|
|
mov [esp+12],ebx
|
|
mov [esp+ 8],esi
|
|
mov [esp+ 4],edi
|
|
|
|
movzx esi,word [esp+in_blk+stk_spc] ; input pointer
|
|
mov eax,[esi ]
|
|
mov ebx,[esi+ 4]
|
|
mov ecx,[esi+ 8]
|
|
mov edx,[esi+12]
|
|
|
|
movzx ebp,word [esp+ctx+stk_spc] ; key pointer
|
|
movzx edi,byte [ebp+4*KS_LENGTH]
|
|
xor eax,[ebp ]
|
|
xor ebx,[ebp+ 4]
|
|
xor ecx,[ebp+ 8]
|
|
xor edx,[ebp+12]
|
|
|
|
; determine the number of rounds
|
|
|
|
%ifndef AES_256
|
|
cmp edi,10*16
|
|
je .3
|
|
cmp edi,12*16
|
|
je .2
|
|
cmp edi,14*16
|
|
je .1
|
|
mov eax,-1
|
|
jmp .5
|
|
%endif
|
|
|
|
.1: mf_call enc_round
|
|
mf_call enc_round
|
|
.2: mf_call enc_round
|
|
mf_call enc_round
|
|
.3: mf_call enc_round
|
|
mf_call enc_round
|
|
mf_call enc_round
|
|
mf_call enc_round
|
|
mf_call enc_round
|
|
mf_call enc_round
|
|
mf_call enc_round
|
|
mf_call enc_round
|
|
mf_call enc_round
|
|
enc_last_round
|
|
|
|
movzx edx,word [esp+out_blk+stk_spc]
|
|
mov [edx],eax
|
|
mov [edx+4],ebx
|
|
mov [edx+8],esi
|
|
mov [edx+12],edi
|
|
xor eax,eax
|
|
|
|
.5: mov ebp,[esp+16]
|
|
mov ebx,[esp+12]
|
|
mov esi,[esp+ 8]
|
|
mov edi,[esp+ 4]
|
|
add esp,stk_spc
|
|
do_exit 12
|
|
|
|
%endif
|
|
|
|
%macro f_key 2
|
|
|
|
push ecx
|
|
push edx
|
|
mov edx,esi
|
|
ror eax,8
|
|
mf_call l3s_col
|
|
mov esi,eax
|
|
pop edx
|
|
pop ecx
|
|
xor esi,rc_val
|
|
|
|
mov [ebp+%1*%2],esi
|
|
xor edi,esi
|
|
mov [ebp+%1*%2+4],edi
|
|
xor ecx,edi
|
|
mov [ebp+%1*%2+8],ecx
|
|
xor edx,ecx
|
|
mov [ebp+%1*%2+12],edx
|
|
mov eax,edx
|
|
|
|
%if %2 == 24
|
|
|
|
%if %1 < 7
|
|
xor eax,[ebp+%1*%2+16-%2]
|
|
mov [ebp+%1*%2+16],eax
|
|
xor eax,[ebp+%1*%2+20-%2]
|
|
mov [ebp+%1*%2+20],eax
|
|
%endif
|
|
|
|
%elif %2 == 32
|
|
|
|
%if %1 < 6
|
|
push ecx
|
|
push edx
|
|
mov edx,[ebp+%1*%2+16-%2]
|
|
mf_call l3s_col
|
|
pop edx
|
|
pop ecx
|
|
mov [ebp+%1*%2+16],eax
|
|
xor eax,[ebp+%1*%2+20-%2]
|
|
mov [ebp+%1*%2+20],eax
|
|
xor eax,[ebp+%1*%2+24-%2]
|
|
mov [ebp+%1*%2+24],eax
|
|
xor eax,[ebp+%1*%2+28-%2]
|
|
mov [ebp+%1*%2+28],eax
|
|
%endif
|
|
|
|
%endif
|
|
|
|
%assign rc_val f2(rc_val)
|
|
|
|
%endmacro
|
|
|
|
%ifdef ENCRYPTION_KEY_SCHEDULE
|
|
|
|
%ifdef AES_128
|
|
|
|
%ifndef ENCRYPTION_TABLE
|
|
; %define ENCRYPTION_TABLE
|
|
%endif
|
|
|
|
%assign rc_val 1
|
|
|
|
do_name _aes_encrypt_key128,8
|
|
|
|
push ebp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
|
|
mov ebp,[esp+24]
|
|
mov [ebp+4*KS_LENGTH],dword 10*16
|
|
mov ebx,[esp+20]
|
|
|
|
mov esi,[ebx]
|
|
mov [ebp],esi
|
|
mov edi,[ebx+4]
|
|
mov [ebp+4],edi
|
|
mov ecx,[ebx+8]
|
|
mov [ebp+8],ecx
|
|
mov edx,[ebx+12]
|
|
mov [ebp+12],edx
|
|
add ebp,16
|
|
mov eax,edx
|
|
|
|
f_key 0,16 ; 11 * 4 = 44 unsigned longs
|
|
f_key 1,16 ; 4 + 4 * 10 generated = 44
|
|
f_key 2,16
|
|
f_key 3,16
|
|
f_key 4,16
|
|
f_key 5,16
|
|
f_key 6,16
|
|
f_key 7,16
|
|
f_key 8,16
|
|
f_key 9,16
|
|
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
pop ebp
|
|
xor eax,eax
|
|
do_exit 8
|
|
|
|
%endif
|
|
|
|
%ifdef AES_192
|
|
|
|
%ifndef ENCRYPTION_TABLE
|
|
; %define ENCRYPTION_TABLE
|
|
%endif
|
|
|
|
%assign rc_val 1
|
|
|
|
do_name _aes_encrypt_key192,8
|
|
|
|
push ebp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
|
|
mov ebp,[esp+24]
|
|
mov [ebp+4*KS_LENGTH],dword 12 * 16
|
|
mov ebx,[esp+20]
|
|
|
|
mov esi,[ebx]
|
|
mov [ebp],esi
|
|
mov edi,[ebx+4]
|
|
mov [ebp+4],edi
|
|
mov ecx,[ebx+8]
|
|
mov [ebp+8],ecx
|
|
mov edx,[ebx+12]
|
|
mov [ebp+12],edx
|
|
mov eax,[ebx+16]
|
|
mov [ebp+16],eax
|
|
mov eax,[ebx+20]
|
|
mov [ebp+20],eax
|
|
add ebp,24
|
|
|
|
f_key 0,24 ; 13 * 4 = 52 unsigned longs
|
|
f_key 1,24 ; 6 + 6 * 8 generated = 54
|
|
f_key 2,24
|
|
f_key 3,24
|
|
f_key 4,24
|
|
f_key 5,24
|
|
f_key 6,24
|
|
f_key 7,24
|
|
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
pop ebp
|
|
xor eax,eax
|
|
do_exit 8
|
|
|
|
%endif
|
|
|
|
%ifdef AES_256
|
|
|
|
%ifndef ENCRYPTION_TABLE
|
|
; %define ENCRYPTION_TABLE
|
|
%endif
|
|
|
|
%assign rc_val 1
|
|
|
|
do_name _aes_encrypt_key256,8
|
|
|
|
mov ax, sp
|
|
movzx esp, ax
|
|
|
|
push ebp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
|
|
movzx ebp, word [esp+20] ; ks
|
|
mov [ebp+4*KS_LENGTH],dword 14 * 16
|
|
movzx ebx, word [esp+18] ; key
|
|
|
|
mov esi,[ebx]
|
|
mov [ebp],esi
|
|
mov edi,[ebx+4]
|
|
mov [ebp+4],edi
|
|
mov ecx,[ebx+8]
|
|
mov [ebp+8],ecx
|
|
mov edx,[ebx+12]
|
|
mov [ebp+12],edx
|
|
mov eax,[ebx+16]
|
|
mov [ebp+16],eax
|
|
mov eax,[ebx+20]
|
|
mov [ebp+20],eax
|
|
mov eax,[ebx+24]
|
|
mov [ebp+24],eax
|
|
mov eax,[ebx+28]
|
|
mov [ebp+28],eax
|
|
add ebp,32
|
|
|
|
f_key 0,32 ; 15 * 4 = 60 unsigned longs
|
|
f_key 1,32 ; 8 + 8 * 7 generated = 64
|
|
f_key 2,32
|
|
f_key 3,32
|
|
f_key 4,32
|
|
f_key 5,32
|
|
f_key 6,32
|
|
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
pop ebp
|
|
xor eax,eax
|
|
do_exit 8
|
|
|
|
%endif
|
|
|
|
%ifdef AES_VAR
|
|
|
|
%ifndef ENCRYPTION_TABLE
|
|
; %define ENCRYPTION_TABLE
|
|
%endif
|
|
|
|
do_name _aes_encrypt_key,12
|
|
|
|
mov ecx,[esp+4]
|
|
mov eax,[esp+8]
|
|
mov edx,[esp+12]
|
|
push edx
|
|
push ecx
|
|
|
|
cmp eax,16
|
|
je .1
|
|
cmp eax,128
|
|
je .1
|
|
|
|
cmp eax,24
|
|
je .2
|
|
cmp eax,192
|
|
je .2
|
|
|
|
cmp eax,32
|
|
je .3
|
|
cmp eax,256
|
|
je .3
|
|
mov eax,-1
|
|
add esp,8
|
|
do_exit 12
|
|
|
|
.1: do_call _aes_encrypt_key128,8
|
|
do_exit 12
|
|
.2: do_call _aes_encrypt_key192,8
|
|
do_exit 12
|
|
.3: do_call _aes_encrypt_key256,8
|
|
do_exit 12
|
|
|
|
%endif
|
|
|
|
%endif
|
|
|
|
%ifdef ENCRYPTION_TABLE
|
|
|
|
; S-box data - 256 entries
|
|
|
|
section _DATA
|
|
|
|
%define u8(x) 0, x, x, f3(x), f2(x), x, x, f3(x)
|
|
|
|
_aes_enc_tab:
|
|
db u8(0x63),u8(0x7c),u8(0x77),u8(0x7b),u8(0xf2),u8(0x6b),u8(0x6f),u8(0xc5)
|
|
db u8(0x30),u8(0x01),u8(0x67),u8(0x2b),u8(0xfe),u8(0xd7),u8(0xab),u8(0x76)
|
|
db u8(0xca),u8(0x82),u8(0xc9),u8(0x7d),u8(0xfa),u8(0x59),u8(0x47),u8(0xf0)
|
|
db u8(0xad),u8(0xd4),u8(0xa2),u8(0xaf),u8(0x9c),u8(0xa4),u8(0x72),u8(0xc0)
|
|
db u8(0xb7),u8(0xfd),u8(0x93),u8(0x26),u8(0x36),u8(0x3f),u8(0xf7),u8(0xcc)
|
|
db u8(0x34),u8(0xa5),u8(0xe5),u8(0xf1),u8(0x71),u8(0xd8),u8(0x31),u8(0x15)
|
|
db u8(0x04),u8(0xc7),u8(0x23),u8(0xc3),u8(0x18),u8(0x96),u8(0x05),u8(0x9a)
|
|
db u8(0x07),u8(0x12),u8(0x80),u8(0xe2),u8(0xeb),u8(0x27),u8(0xb2),u8(0x75)
|
|
db u8(0x09),u8(0x83),u8(0x2c),u8(0x1a),u8(0x1b),u8(0x6e),u8(0x5a),u8(0xa0)
|
|
db u8(0x52),u8(0x3b),u8(0xd6),u8(0xb3),u8(0x29),u8(0xe3),u8(0x2f),u8(0x84)
|
|
db u8(0x53),u8(0xd1),u8(0x00),u8(0xed),u8(0x20),u8(0xfc),u8(0xb1),u8(0x5b)
|
|
db u8(0x6a),u8(0xcb),u8(0xbe),u8(0x39),u8(0x4a),u8(0x4c),u8(0x58),u8(0xcf)
|
|
db u8(0xd0),u8(0xef),u8(0xaa),u8(0xfb),u8(0x43),u8(0x4d),u8(0x33),u8(0x85)
|
|
db u8(0x45),u8(0xf9),u8(0x02),u8(0x7f),u8(0x50),u8(0x3c),u8(0x9f),u8(0xa8)
|
|
db u8(0x51),u8(0xa3),u8(0x40),u8(0x8f),u8(0x92),u8(0x9d),u8(0x38),u8(0xf5)
|
|
db u8(0xbc),u8(0xb6),u8(0xda),u8(0x21),u8(0x10),u8(0xff),u8(0xf3),u8(0xd2)
|
|
db u8(0xcd),u8(0x0c),u8(0x13),u8(0xec),u8(0x5f),u8(0x97),u8(0x44),u8(0x17)
|
|
db u8(0xc4),u8(0xa7),u8(0x7e),u8(0x3d),u8(0x64),u8(0x5d),u8(0x19),u8(0x73)
|
|
db u8(0x60),u8(0x81),u8(0x4f),u8(0xdc),u8(0x22),u8(0x2a),u8(0x90),u8(0x88)
|
|
db u8(0x46),u8(0xee),u8(0xb8),u8(0x14),u8(0xde),u8(0x5e),u8(0x0b),u8(0xdb)
|
|
db u8(0xe0),u8(0x32),u8(0x3a),u8(0x0a),u8(0x49),u8(0x06),u8(0x24),u8(0x5c)
|
|
db u8(0xc2),u8(0xd3),u8(0xac),u8(0x62),u8(0x91),u8(0x95),u8(0xe4),u8(0x79)
|
|
db u8(0xe7),u8(0xc8),u8(0x37),u8(0x6d),u8(0x8d),u8(0xd5),u8(0x4e),u8(0xa9)
|
|
db u8(0x6c),u8(0x56),u8(0xf4),u8(0xea),u8(0x65),u8(0x7a),u8(0xae),u8(0x08)
|
|
db u8(0xba),u8(0x78),u8(0x25),u8(0x2e),u8(0x1c),u8(0xa6),u8(0xb4),u8(0xc6)
|
|
db u8(0xe8),u8(0xdd),u8(0x74),u8(0x1f),u8(0x4b),u8(0xbd),u8(0x8b),u8(0x8a)
|
|
db u8(0x70),u8(0x3e),u8(0xb5),u8(0x66),u8(0x48),u8(0x03),u8(0xf6),u8(0x0e)
|
|
db u8(0x61),u8(0x35),u8(0x57),u8(0xb9),u8(0x86),u8(0xc1),u8(0x1d),u8(0x9e)
|
|
db u8(0xe1),u8(0xf8),u8(0x98),u8(0x11),u8(0x69),u8(0xd9),u8(0x8e),u8(0x94)
|
|
db u8(0x9b),u8(0x1e),u8(0x87),u8(0xe9),u8(0xce),u8(0x55),u8(0x28),u8(0xdf)
|
|
db u8(0x8c),u8(0xa1),u8(0x89),u8(0x0d),u8(0xbf),u8(0xe6),u8(0x42),u8(0x68)
|
|
db u8(0x41),u8(0x99),u8(0x2d),u8(0x0f),u8(0xb0),u8(0x54),u8(0xbb),u8(0x16)
|
|
|
|
%endif
|
|
|
|
%ifdef DECRYPTION
|
|
|
|
; %define DECRYPTION_TABLE
|
|
|
|
%define dtab_0(x) [_aes_dec_tab+ 8*x]
|
|
%define dtab_1(x) [_aes_dec_tab+3+8*x]
|
|
%define dtab_2(x) [_aes_dec_tab+2+8*x]
|
|
%define dtab_3(x) [_aes_dec_tab+1+8*x]
|
|
%define dtab_x(x) byte [_aes_dec_tab+7+8*x]
|
|
|
|
%macro irn_fun 2
|
|
|
|
rol eax,16
|
|
%1 esi, cl, 0, ebp
|
|
%1 esi, bh, 1, ebp
|
|
%1 esi, al, 2, ebp
|
|
%1 edi, dl, 0, ebp
|
|
%1 edi, ch, 1, ebp
|
|
%1 edi, ah, 3, ebp
|
|
%2 ebp, bl, 0, ebp
|
|
shr eax,16
|
|
and ebx,0xffff0000
|
|
or ebx,eax
|
|
shr ecx,16
|
|
%1 ebp, bh, 1, eax
|
|
%1 ebp, ch, 3, eax
|
|
%2 eax, cl, 2, ecx
|
|
%1 eax, bl, 0, ecx
|
|
%1 eax, dh, 1, ecx
|
|
shr ebx,16
|
|
shr edx,16
|
|
%1 esi, dh, 3, ecx
|
|
%1 ebp, dl, 2, ecx
|
|
%1 eax, bh, 3, ecx
|
|
%1 edi, bl, 2, ecx
|
|
|
|
%endmacro
|
|
|
|
; Basic MOV and XOR Operations for normal rounds
|
|
|
|
%macro ni_xor 4
|
|
movzx %4,%2
|
|
xor %1,dtab_%3(%4)
|
|
%endmacro
|
|
|
|
%macro ni_mov 4
|
|
movzx %4,%2
|
|
mov %1,dtab_%3(%4)
|
|
%endmacro
|
|
|
|
; Basic MOV and XOR Operations for last round
|
|
|
|
%macro li_xor 4
|
|
movzx %4,%2
|
|
movzx %4,dtab_x(%4)
|
|
%if %3 != 0
|
|
shl %4,8*%3
|
|
%endif
|
|
xor %1,%4
|
|
%endmacro
|
|
|
|
%macro li_mov 4
|
|
movzx %4,%2
|
|
movzx %1,dtab_x(%4)
|
|
%if %3 != 0
|
|
shl %1,8*%3
|
|
%endif
|
|
%endmacro
|
|
|
|
%ifdef REDUCE_CODE_SIZE
|
|
|
|
dec_round:
|
|
sub sp, 2
|
|
%ifdef AES_REV_DKS
|
|
add ebp,16
|
|
%else
|
|
sub ebp,16
|
|
%endif
|
|
save 1,ebp
|
|
mov esi,[ebp+8]
|
|
mov edi,[ebp+12]
|
|
|
|
irn_fun ni_xor, ni_mov
|
|
|
|
mov ebx,ebp
|
|
mov ecx,esi
|
|
mov edx,edi
|
|
restore ebp,1
|
|
xor eax,[ebp]
|
|
xor ebx,[ebp+4]
|
|
add sp, 2
|
|
ret
|
|
|
|
%else
|
|
|
|
%macro dec_round 0
|
|
|
|
%ifdef AES_REV_DKS
|
|
add ebp,16
|
|
%else
|
|
sub ebp,16
|
|
%endif
|
|
save 0,ebp
|
|
mov esi,[ebp+8]
|
|
mov edi,[ebp+12]
|
|
|
|
irn_fun ni_xor, ni_mov
|
|
|
|
mov ebx,ebp
|
|
mov ecx,esi
|
|
mov edx,edi
|
|
restore ebp,0
|
|
xor eax,[ebp]
|
|
xor ebx,[ebp+4]
|
|
|
|
%endmacro
|
|
|
|
%endif
|
|
|
|
%macro dec_last_round 0
|
|
|
|
%ifdef AES_REV_DKS
|
|
add ebp,16
|
|
%else
|
|
sub ebp,16
|
|
%endif
|
|
save 0,ebp
|
|
mov esi,[ebp+8]
|
|
mov edi,[ebp+12]
|
|
|
|
irn_fun li_xor, li_mov
|
|
|
|
mov ebx,ebp
|
|
restore ebp,0
|
|
xor eax,[ebp]
|
|
xor ebx,[ebp+4]
|
|
|
|
%endmacro
|
|
|
|
section _TEXT
|
|
|
|
; AES Decryption Subroutine
|
|
|
|
do_name _aes_decrypt,12
|
|
|
|
mov ax, sp
|
|
movzx esp, ax
|
|
|
|
sub esp,stk_spc
|
|
mov [esp+16],ebp
|
|
mov [esp+12],ebx
|
|
mov [esp+ 8],esi
|
|
mov [esp+ 4],edi
|
|
|
|
; input four columns and xor in first round key
|
|
|
|
movzx esi,word [esp+in_blk+stk_spc] ; input pointer
|
|
mov eax,[esi ]
|
|
mov ebx,[esi+ 4]
|
|
mov ecx,[esi+ 8]
|
|
mov edx,[esi+12]
|
|
lea esi,[esi+16]
|
|
|
|
movzx ebp, word [esp+ctx+stk_spc] ; key pointer
|
|
movzx edi,byte[ebp+4*KS_LENGTH]
|
|
%ifndef AES_REV_DKS ; if decryption key schedule is not reversed
|
|
lea ebp,[ebp+edi] ; we have to access it from the top down
|
|
%endif
|
|
xor eax,[ebp ] ; key schedule
|
|
xor ebx,[ebp+ 4]
|
|
xor ecx,[ebp+ 8]
|
|
xor edx,[ebp+12]
|
|
|
|
; determine the number of rounds
|
|
|
|
%ifndef AES_256
|
|
cmp edi,10*16
|
|
je .3
|
|
cmp edi,12*16
|
|
je .2
|
|
cmp edi,14*16
|
|
je .1
|
|
mov eax,-1
|
|
jmp .5
|
|
%endif
|
|
|
|
.1: mf_call dec_round
|
|
mf_call dec_round
|
|
.2: mf_call dec_round
|
|
mf_call dec_round
|
|
.3: mf_call dec_round
|
|
mf_call dec_round
|
|
mf_call dec_round
|
|
mf_call dec_round
|
|
mf_call dec_round
|
|
mf_call dec_round
|
|
mf_call dec_round
|
|
mf_call dec_round
|
|
mf_call dec_round
|
|
dec_last_round
|
|
|
|
; move final values to the output array.
|
|
|
|
movzx ebp,word [esp+out_blk+stk_spc]
|
|
mov [ebp],eax
|
|
mov [ebp+4],ebx
|
|
mov [ebp+8],esi
|
|
mov [ebp+12],edi
|
|
xor eax,eax
|
|
|
|
.5: mov ebp,[esp+16]
|
|
mov ebx,[esp+12]
|
|
mov esi,[esp+ 8]
|
|
mov edi,[esp+ 4]
|
|
add esp,stk_spc
|
|
do_exit 12
|
|
|
|
%endif
|
|
|
|
%ifdef REDUCE_CODE_SIZE
|
|
|
|
inv_mix_col:
|
|
movzx ecx,dl ; input eax, edx
|
|
movzx ecx,etab_b(ecx) ; output eax
|
|
mov eax,dtab_0(ecx) ; used ecx
|
|
movzx ecx,dh
|
|
shr edx,16
|
|
movzx ecx,etab_b(ecx)
|
|
xor eax,dtab_1(ecx)
|
|
movzx ecx,dl
|
|
movzx ecx,etab_b(ecx)
|
|
xor eax,dtab_2(ecx)
|
|
movzx ecx,dh
|
|
movzx ecx,etab_b(ecx)
|
|
xor eax,dtab_3(ecx)
|
|
ret
|
|
|
|
%else
|
|
|
|
%macro inv_mix_col 0
|
|
|
|
movzx ecx,dl ; input eax, edx
|
|
movzx ecx,etab_b(ecx) ; output eax
|
|
mov eax,dtab_0(ecx) ; used ecx
|
|
movzx ecx,dh
|
|
shr edx,16
|
|
movzx ecx,etab_b(ecx)
|
|
xor eax,dtab_1(ecx)
|
|
movzx ecx,dl
|
|
movzx ecx,etab_b(ecx)
|
|
xor eax,dtab_2(ecx)
|
|
movzx ecx,dh
|
|
movzx ecx,etab_b(ecx)
|
|
xor eax,dtab_3(ecx)
|
|
|
|
%endmacro
|
|
|
|
%endif
|
|
|
|
%ifdef DECRYPTION_KEY_SCHEDULE
|
|
|
|
%ifdef AES_128
|
|
|
|
%ifndef DECRYPTION_TABLE
|
|
; %define DECRYPTION_TABLE
|
|
%endif
|
|
|
|
do_name _aes_decrypt_key128,8
|
|
|
|
push ebp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
mov eax,[esp+24] ; context
|
|
mov edx,[esp+20] ; key
|
|
push eax
|
|
push edx
|
|
do_call _aes_encrypt_key128,8 ; generate expanded encryption key
|
|
mov eax,10*16
|
|
mov esi,[esp+24] ; pointer to first round key
|
|
lea edi,[esi+eax] ; pointer to last round key
|
|
add esi,32
|
|
; the inverse mix column transformation
|
|
mov edx,[esi-16] ; needs to be applied to all round keys
|
|
mf_call inv_mix_col ; except first and last. Hence start by
|
|
mov [esi-16],eax ; transforming the four sub-keys in the
|
|
mov edx,[esi-12] ; second round key
|
|
mf_call inv_mix_col
|
|
mov [esi-12],eax ; transformations for subsequent rounds
|
|
mov edx,[esi-8] ; can then be made more efficient by
|
|
mf_call inv_mix_col ; noting that for three of the four sub-keys
|
|
mov [esi-8],eax ; in the encryption round key ek[r]:
|
|
mov edx,[esi-4] ;
|
|
mf_call inv_mix_col ; ek[r][n] = ek[r][n-1] ^ ek[r-1][n]
|
|
mov [esi-4],eax ;
|
|
; where n is 1..3. Hence the corresponding
|
|
.0: mov edx,[esi] ; subkeys in the decryption round key dk[r]
|
|
mf_call inv_mix_col ; also obey since inv_mix_col is linear in
|
|
mov [esi],eax ; GF(256):
|
|
xor eax,[esi-12] ;
|
|
mov [esi+4],eax ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n]
|
|
xor eax,[esi-8] ;
|
|
mov [esi+8],eax ; So we only need one inverse mix column
|
|
xor eax,[esi-4] ; operation (n = 0) for each four word cycle
|
|
mov [esi+12],eax ; in the expanded key.
|
|
add esi,16
|
|
cmp edi,esi
|
|
jg .0
|
|
jmp dec_end
|
|
|
|
%endif
|
|
|
|
%ifdef AES_192
|
|
|
|
%ifndef DECRYPTION_TABLE
|
|
; %define DECRYPTION_TABLE
|
|
%endif
|
|
|
|
do_name _aes_decrypt_key192,8
|
|
|
|
push ebp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
mov eax,[esp+24] ; context
|
|
mov edx,[esp+20] ; key
|
|
push eax
|
|
push edx
|
|
do_call _aes_encrypt_key192,8 ; generate expanded encryption key
|
|
mov eax,12*16
|
|
mov esi,[esp+24] ; first round key
|
|
lea edi,[esi+eax] ; last round key
|
|
add esi,48 ; the first 6 words are the key, of
|
|
; which the top 2 words are part of
|
|
mov edx,[esi-32] ; the second round key and hence
|
|
mf_call inv_mix_col ; need to be modified. After this we
|
|
mov [esi-32],eax ; need to do a further six values prior
|
|
mov edx,[esi-28] ; to using a more efficient technique
|
|
mf_call inv_mix_col ; based on:
|
|
mov [esi-28],eax ;
|
|
; dk[r][n] = dk[r][n-1] ^ dk[r-1][n]
|
|
mov edx,[esi-24] ;
|
|
mf_call inv_mix_col ; for n = 1 .. 5 where the key expansion
|
|
mov [esi-24],eax ; cycle is now 6 words long
|
|
mov edx,[esi-20]
|
|
mf_call inv_mix_col
|
|
mov [esi-20],eax
|
|
mov edx,[esi-16]
|
|
mf_call inv_mix_col
|
|
mov [esi-16],eax
|
|
mov edx,[esi-12]
|
|
mf_call inv_mix_col
|
|
mov [esi-12],eax
|
|
mov edx,[esi-8]
|
|
mf_call inv_mix_col
|
|
mov [esi-8],eax
|
|
mov edx,[esi-4]
|
|
mf_call inv_mix_col
|
|
mov [esi-4],eax
|
|
|
|
.0: mov edx,[esi] ; the expanded key is 13 * 4 = 44 32-bit words
|
|
mf_call inv_mix_col ; of which 11 * 4 = 44 have to be modified
|
|
mov [esi],eax ; using inv_mix_col. We have already done 8
|
|
xor eax,[esi-20] ; of these so 36 are left - hence we need
|
|
mov [esi+4],eax ; exactly 6 loops of six here
|
|
xor eax,[esi-16]
|
|
mov [esi+8],eax
|
|
xor eax,[esi-12]
|
|
mov [esi+12],eax
|
|
xor eax,[esi-8]
|
|
mov [esi+16],eax
|
|
xor eax,[esi-4]
|
|
mov [esi+20],eax
|
|
add esi,24
|
|
cmp edi,esi
|
|
jg .0
|
|
jmp dec_end
|
|
|
|
%endif
|
|
|
|
%ifdef AES_256
|
|
|
|
%ifndef DECRYPTION_TABLE
|
|
; %define DECRYPTION_TABLE
|
|
%endif
|
|
|
|
do_name _aes_decrypt_key256,8
|
|
|
|
mov ax, sp
|
|
movzx esp, ax
|
|
push ebp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
|
|
movzx eax, word [esp+20] ; ks
|
|
movzx edx, word [esp+18] ; key
|
|
push ax
|
|
push dx
|
|
do_call _aes_encrypt_key256,4 ; generate expanded encryption key
|
|
mov eax,14*16
|
|
movzx esi, word [esp+20] ; ks
|
|
lea edi,[esi+eax]
|
|
add esi,64
|
|
|
|
mov edx,[esi-48] ; the primary key is 8 words, of which
|
|
mf_call inv_mix_col ; the top four require modification
|
|
mov [esi-48],eax
|
|
mov edx,[esi-44]
|
|
mf_call inv_mix_col
|
|
mov [esi-44],eax
|
|
mov edx,[esi-40]
|
|
mf_call inv_mix_col
|
|
mov [esi-40],eax
|
|
mov edx,[esi-36]
|
|
mf_call inv_mix_col
|
|
mov [esi-36],eax
|
|
|
|
mov edx,[esi-32] ; the encryption key expansion cycle is
|
|
mf_call inv_mix_col ; now eight words long so we need to
|
|
mov [esi-32],eax ; start by doing one complete block
|
|
mov edx,[esi-28]
|
|
mf_call inv_mix_col
|
|
mov [esi-28],eax
|
|
mov edx,[esi-24]
|
|
mf_call inv_mix_col
|
|
mov [esi-24],eax
|
|
mov edx,[esi-20]
|
|
mf_call inv_mix_col
|
|
mov [esi-20],eax
|
|
mov edx,[esi-16]
|
|
mf_call inv_mix_col
|
|
mov [esi-16],eax
|
|
mov edx,[esi-12]
|
|
mf_call inv_mix_col
|
|
mov [esi-12],eax
|
|
mov edx,[esi-8]
|
|
mf_call inv_mix_col
|
|
mov [esi-8],eax
|
|
mov edx,[esi-4]
|
|
mf_call inv_mix_col
|
|
mov [esi-4],eax
|
|
|
|
.0: mov edx,[esi] ; we can now speed up the remaining
|
|
mf_call inv_mix_col ; rounds by using the technique
|
|
mov [esi],eax ; outlined earlier. But note that
|
|
xor eax,[esi-28] ; there is one extra inverse mix
|
|
mov [esi+4],eax ; column operation as the 256 bit
|
|
xor eax,[esi-24] ; key has an extra non-linear step
|
|
mov [esi+8],eax ; for the midway element.
|
|
xor eax,[esi-20]
|
|
mov [esi+12],eax ; the expanded key is 15 * 4 = 60
|
|
mov edx,[esi+16] ; 32-bit words of which 52 need to
|
|
mf_call inv_mix_col ; be modified. We have already done
|
|
mov [esi+16],eax ; 12 so 40 are left - which means
|
|
xor eax,[esi-12] ; that we need exactly 5 loops of 8
|
|
mov [esi+20],eax
|
|
xor eax,[esi-8]
|
|
mov [esi+24],eax
|
|
xor eax,[esi-4]
|
|
mov [esi+28],eax
|
|
add esi,32
|
|
cmp edi,esi
|
|
jg .0
|
|
|
|
%endif
|
|
|
|
dec_end:
|
|
|
|
%ifdef AES_REV_DKS
|
|
|
|
movzx esi,word [esp+20] ; this reverses the order of the
|
|
.1: mov eax,[esi] ; round keys if required
|
|
mov ebx,[esi+4]
|
|
mov ebp,[edi]
|
|
mov edx,[edi+4]
|
|
mov [esi],ebp
|
|
mov [esi+4],edx
|
|
mov [edi],eax
|
|
mov [edi+4],ebx
|
|
|
|
mov eax,[esi+8]
|
|
mov ebx,[esi+12]
|
|
mov ebp,[edi+8]
|
|
mov edx,[edi+12]
|
|
mov [esi+8],ebp
|
|
mov [esi+12],edx
|
|
mov [edi+8],eax
|
|
mov [edi+12],ebx
|
|
|
|
add esi,16
|
|
sub edi,16
|
|
cmp edi,esi
|
|
jg .1
|
|
|
|
%endif
|
|
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
pop ebp
|
|
xor eax,eax
|
|
do_exit 8
|
|
|
|
%ifdef AES_VAR
|
|
|
|
do_name _aes_decrypt_key,12
|
|
|
|
mov ecx,[esp+4]
|
|
mov eax,[esp+8]
|
|
mov edx,[esp+12]
|
|
push edx
|
|
push ecx
|
|
|
|
cmp eax,16
|
|
je .1
|
|
cmp eax,128
|
|
je .1
|
|
|
|
cmp eax,24
|
|
je .2
|
|
cmp eax,192
|
|
je .2
|
|
|
|
cmp eax,32
|
|
je .3
|
|
cmp eax,256
|
|
je .3
|
|
mov eax,-1
|
|
add esp,8
|
|
do_exit 12
|
|
|
|
.1: do_call _aes_decrypt_key128,8
|
|
do_exit 12
|
|
.2: do_call _aes_decrypt_key192,8
|
|
do_exit 12
|
|
.3: do_call _aes_decrypt_key256,8
|
|
do_exit 12
|
|
|
|
%endif
|
|
|
|
%endif
|
|
|
|
%ifdef DECRYPTION_TABLE
|
|
|
|
; Inverse S-box data - 256 entries
|
|
|
|
section _DATA
|
|
|
|
%define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x
|
|
|
|
_aes_dec_tab:
|
|
db v8(0x52),v8(0x09),v8(0x6a),v8(0xd5),v8(0x30),v8(0x36),v8(0xa5),v8(0x38)
|
|
db v8(0xbf),v8(0x40),v8(0xa3),v8(0x9e),v8(0x81),v8(0xf3),v8(0xd7),v8(0xfb)
|
|
db v8(0x7c),v8(0xe3),v8(0x39),v8(0x82),v8(0x9b),v8(0x2f),v8(0xff),v8(0x87)
|
|
db v8(0x34),v8(0x8e),v8(0x43),v8(0x44),v8(0xc4),v8(0xde),v8(0xe9),v8(0xcb)
|
|
db v8(0x54),v8(0x7b),v8(0x94),v8(0x32),v8(0xa6),v8(0xc2),v8(0x23),v8(0x3d)
|
|
db v8(0xee),v8(0x4c),v8(0x95),v8(0x0b),v8(0x42),v8(0xfa),v8(0xc3),v8(0x4e)
|
|
db v8(0x08),v8(0x2e),v8(0xa1),v8(0x66),v8(0x28),v8(0xd9),v8(0x24),v8(0xb2)
|
|
db v8(0x76),v8(0x5b),v8(0xa2),v8(0x49),v8(0x6d),v8(0x8b),v8(0xd1),v8(0x25)
|
|
db v8(0x72),v8(0xf8),v8(0xf6),v8(0x64),v8(0x86),v8(0x68),v8(0x98),v8(0x16)
|
|
db v8(0xd4),v8(0xa4),v8(0x5c),v8(0xcc),v8(0x5d),v8(0x65),v8(0xb6),v8(0x92)
|
|
db v8(0x6c),v8(0x70),v8(0x48),v8(0x50),v8(0xfd),v8(0xed),v8(0xb9),v8(0xda)
|
|
db v8(0x5e),v8(0x15),v8(0x46),v8(0x57),v8(0xa7),v8(0x8d),v8(0x9d),v8(0x84)
|
|
db v8(0x90),v8(0xd8),v8(0xab),v8(0x00),v8(0x8c),v8(0xbc),v8(0xd3),v8(0x0a)
|
|
db v8(0xf7),v8(0xe4),v8(0x58),v8(0x05),v8(0xb8),v8(0xb3),v8(0x45),v8(0x06)
|
|
db v8(0xd0),v8(0x2c),v8(0x1e),v8(0x8f),v8(0xca),v8(0x3f),v8(0x0f),v8(0x02)
|
|
db v8(0xc1),v8(0xaf),v8(0xbd),v8(0x03),v8(0x01),v8(0x13),v8(0x8a),v8(0x6b)
|
|
db v8(0x3a),v8(0x91),v8(0x11),v8(0x41),v8(0x4f),v8(0x67),v8(0xdc),v8(0xea)
|
|
db v8(0x97),v8(0xf2),v8(0xcf),v8(0xce),v8(0xf0),v8(0xb4),v8(0xe6),v8(0x73)
|
|
db v8(0x96),v8(0xac),v8(0x74),v8(0x22),v8(0xe7),v8(0xad),v8(0x35),v8(0x85)
|
|
db v8(0xe2),v8(0xf9),v8(0x37),v8(0xe8),v8(0x1c),v8(0x75),v8(0xdf),v8(0x6e)
|
|
db v8(0x47),v8(0xf1),v8(0x1a),v8(0x71),v8(0x1d),v8(0x29),v8(0xc5),v8(0x89)
|
|
db v8(0x6f),v8(0xb7),v8(0x62),v8(0x0e),v8(0xaa),v8(0x18),v8(0xbe),v8(0x1b)
|
|
db v8(0xfc),v8(0x56),v8(0x3e),v8(0x4b),v8(0xc6),v8(0xd2),v8(0x79),v8(0x20)
|
|
db v8(0x9a),v8(0xdb),v8(0xc0),v8(0xfe),v8(0x78),v8(0xcd),v8(0x5a),v8(0xf4)
|
|
db v8(0x1f),v8(0xdd),v8(0xa8),v8(0x33),v8(0x88),v8(0x07),v8(0xc7),v8(0x31)
|
|
db v8(0xb1),v8(0x12),v8(0x10),v8(0x59),v8(0x27),v8(0x80),v8(0xec),v8(0x5f)
|
|
db v8(0x60),v8(0x51),v8(0x7f),v8(0xa9),v8(0x19),v8(0xb5),v8(0x4a),v8(0x0d)
|
|
db v8(0x2d),v8(0xe5),v8(0x7a),v8(0x9f),v8(0x93),v8(0xc9),v8(0x9c),v8(0xef)
|
|
db v8(0xa0),v8(0xe0),v8(0x3b),v8(0x4d),v8(0xae),v8(0x2a),v8(0xf5),v8(0xb0)
|
|
db v8(0xc8),v8(0xeb),v8(0xbb),v8(0x3c),v8(0x83),v8(0x53),v8(0x99),v8(0x61)
|
|
db v8(0x17),v8(0x2b),v8(0x04),v8(0x7e),v8(0xba),v8(0x77),v8(0xd6),v8(0x26)
|
|
db v8(0xe1),v8(0x69),v8(0x14),v8(0x63),v8(0x55),v8(0x21),v8(0x0c),v8(0x7d)
|
|
|
|
%endif
|