|
Packit |
40b132 |
; LICENSE:
|
|
Packit |
40b132 |
; This submission to NSS is to be made available under the terms of the
|
|
Packit |
40b132 |
; Mozilla Public License, v. 2.0. You can obtain one at http:
|
|
Packit |
40b132 |
; //mozilla.org/MPL/2.0/.
|
|
Packit |
40b132 |
;###############################################################################
|
|
Packit |
40b132 |
; Copyright(c) 2014, Intel Corp.
|
|
Packit |
40b132 |
; Developers and authors:
|
|
Packit |
40b132 |
; Shay Gueron and Vlad Krasnov
|
|
Packit |
40b132 |
; Intel Corporation, Israel Development Centre, Haifa, Israel
|
|
Packit |
40b132 |
; Please send feedback directly to crypto.feedback.alias@intel.com
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
.MODEL FLAT, C
|
|
Packit |
40b132 |
.XMM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
.DATA
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
|
|
Packit |
40b132 |
Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
|
|
Packit |
40b132 |
Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
|
|
Packit |
40b132 |
Lcon1 dd 1,1,1,1
|
|
Packit |
40b132 |
Lcon2 dd 1bh,1bh,1bh,1bh
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
.CODE
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ctx textequ <ecx>
|
|
Packit |
40b132 |
output textequ <edx>
|
|
Packit |
40b132 |
input textequ <eax>
|
|
Packit |
40b132 |
inputLen textequ <edi>
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
aes_rnd MACRO i
|
|
Packit |
40b132 |
movdqu xmm7, [i*16 + ctx]
|
|
Packit |
40b132 |
aesenc xmm0, xmm7
|
|
Packit |
40b132 |
aesenc xmm1, xmm7
|
|
Packit |
40b132 |
aesenc xmm2, xmm7
|
|
Packit |
40b132 |
aesenc xmm3, xmm7
|
|
Packit |
40b132 |
aesenc xmm4, xmm7
|
|
Packit |
40b132 |
aesenc xmm5, xmm7
|
|
Packit |
40b132 |
aesenc xmm6, xmm7
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
aes_last_rnd MACRO i
|
|
Packit |
40b132 |
movdqu xmm7, [i*16 + ctx]
|
|
Packit |
40b132 |
aesenclast xmm0, xmm7
|
|
Packit |
40b132 |
aesenclast xmm1, xmm7
|
|
Packit |
40b132 |
aesenclast xmm2, xmm7
|
|
Packit |
40b132 |
aesenclast xmm3, xmm7
|
|
Packit |
40b132 |
aesenclast xmm4, xmm7
|
|
Packit |
40b132 |
aesenclast xmm5, xmm7
|
|
Packit |
40b132 |
aesenclast xmm6, xmm7
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
aes_dec_rnd MACRO i
|
|
Packit |
40b132 |
movdqu xmm7, [i*16 + ctx]
|
|
Packit |
40b132 |
aesdec xmm0, xmm7
|
|
Packit |
40b132 |
aesdec xmm1, xmm7
|
|
Packit |
40b132 |
aesdec xmm2, xmm7
|
|
Packit |
40b132 |
aesdec xmm3, xmm7
|
|
Packit |
40b132 |
aesdec xmm4, xmm7
|
|
Packit |
40b132 |
aesdec xmm5, xmm7
|
|
Packit |
40b132 |
aesdec xmm6, xmm7
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
aes_dec_last_rnd MACRO i
|
|
Packit |
40b132 |
movdqu xmm7, [i*16 + ctx]
|
|
Packit |
40b132 |
aesdeclast xmm0, xmm7
|
|
Packit |
40b132 |
aesdeclast xmm1, xmm7
|
|
Packit |
40b132 |
aesdeclast xmm2, xmm7
|
|
Packit |
40b132 |
aesdeclast xmm3, xmm7
|
|
Packit |
40b132 |
aesdeclast xmm4, xmm7
|
|
Packit |
40b132 |
aesdeclast xmm5, xmm7
|
|
Packit |
40b132 |
aesdeclast xmm6, xmm7
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
gen_aes_ecb_func MACRO enc, rnds
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
LOCAL loop7
|
|
Packit |
40b132 |
LOCAL loop1
|
|
Packit |
40b132 |
LOCAL bail
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
push inputLen
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov ctx, [esp + 2*4 + 0*4]
|
|
Packit |
40b132 |
mov output, [esp + 2*4 + 1*4]
|
|
Packit |
40b132 |
mov input, [esp + 2*4 + 4*4]
|
|
Packit |
40b132 |
mov inputLen, [esp + 2*4 + 5*4]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea ctx, [44+ctx]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
loop7:
|
|
Packit |
40b132 |
cmp inputLen, 7*16
|
|
Packit |
40b132 |
jb loop1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [0*16 + input]
|
|
Packit |
40b132 |
movdqu xmm1, [1*16 + input]
|
|
Packit |
40b132 |
movdqu xmm2, [2*16 + input]
|
|
Packit |
40b132 |
movdqu xmm3, [3*16 + input]
|
|
Packit |
40b132 |
movdqu xmm4, [4*16 + input]
|
|
Packit |
40b132 |
movdqu xmm5, [5*16 + input]
|
|
Packit |
40b132 |
movdqu xmm6, [6*16 + input]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm7, [0*16 + ctx]
|
|
Packit |
40b132 |
pxor xmm0, xmm7
|
|
Packit |
40b132 |
pxor xmm1, xmm7
|
|
Packit |
40b132 |
pxor xmm2, xmm7
|
|
Packit |
40b132 |
pxor xmm3, xmm7
|
|
Packit |
40b132 |
pxor xmm4, xmm7
|
|
Packit |
40b132 |
pxor xmm5, xmm7
|
|
Packit |
40b132 |
pxor xmm6, xmm7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
IF enc eq 1
|
|
Packit |
40b132 |
rnd textequ <aes_rnd>
|
|
Packit |
40b132 |
lastrnd textequ <aes_last_rnd>
|
|
Packit |
40b132 |
aesinst textequ <aesenc>
|
|
Packit |
40b132 |
aeslastinst textequ <aesenclast>
|
|
Packit |
40b132 |
ELSE
|
|
Packit |
40b132 |
rnd textequ <aes_dec_rnd>
|
|
Packit |
40b132 |
lastrnd textequ <aes_dec_last_rnd>
|
|
Packit |
40b132 |
aesinst textequ <aesdec>
|
|
Packit |
40b132 |
aeslastinst textequ <aesdeclast>
|
|
Packit |
40b132 |
ENDIF
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = 1
|
|
Packit |
40b132 |
WHILE i LT rnds
|
|
Packit |
40b132 |
rnd i
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
lastrnd rnds
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [0*16 + output], xmm0
|
|
Packit |
40b132 |
movdqu [1*16 + output], xmm1
|
|
Packit |
40b132 |
movdqu [2*16 + output], xmm2
|
|
Packit |
40b132 |
movdqu [3*16 + output], xmm3
|
|
Packit |
40b132 |
movdqu [4*16 + output], xmm4
|
|
Packit |
40b132 |
movdqu [5*16 + output], xmm5
|
|
Packit |
40b132 |
movdqu [6*16 + output], xmm6
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea input, [7*16 + input]
|
|
Packit |
40b132 |
lea output, [7*16 + output]
|
|
Packit |
40b132 |
sub inputLen, 7*16
|
|
Packit |
40b132 |
jmp loop7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
loop1:
|
|
Packit |
40b132 |
cmp inputLen, 1*16
|
|
Packit |
40b132 |
jb bail
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [input]
|
|
Packit |
40b132 |
movdqu xmm7, [0*16 + ctx]
|
|
Packit |
40b132 |
pxor xmm0, xmm7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = 1
|
|
Packit |
40b132 |
WHILE i LT rnds
|
|
Packit |
40b132 |
movdqu xmm7, [i*16 + ctx]
|
|
Packit |
40b132 |
aesinst xmm0, xmm7
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
movdqu xmm7, [rnds*16 + ctx]
|
|
Packit |
40b132 |
aeslastinst xmm0, xmm7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [output], xmm0
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea input, [1*16 + input]
|
|
Packit |
40b132 |
lea output, [1*16 + output]
|
|
Packit |
40b132 |
sub inputLen, 1*16
|
|
Packit |
40b132 |
jmp loop1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
bail:
|
|
Packit |
40b132 |
xor eax, eax
|
|
Packit |
40b132 |
pop inputLen
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_ecb_128 PROC
|
|
Packit |
40b132 |
gen_aes_ecb_func 1, 10
|
|
Packit |
40b132 |
intel_aes_encrypt_ecb_128 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_ecb_192 PROC
|
|
Packit |
40b132 |
gen_aes_ecb_func 1, 12
|
|
Packit |
40b132 |
intel_aes_encrypt_ecb_192 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_ecb_256 PROC
|
|
Packit |
40b132 |
gen_aes_ecb_func 1, 14
|
|
Packit |
40b132 |
intel_aes_encrypt_ecb_256 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_decrypt_ecb_128 PROC
|
|
Packit |
40b132 |
gen_aes_ecb_func 0, 10
|
|
Packit |
40b132 |
intel_aes_decrypt_ecb_128 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_decrypt_ecb_192 PROC
|
|
Packit |
40b132 |
gen_aes_ecb_func 0, 12
|
|
Packit |
40b132 |
intel_aes_decrypt_ecb_192 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_decrypt_ecb_256 PROC
|
|
Packit |
40b132 |
gen_aes_ecb_func 0, 14
|
|
Packit |
40b132 |
intel_aes_decrypt_ecb_256 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
KEY textequ <ecx>
|
|
Packit |
40b132 |
KS textequ <edx>
|
|
Packit |
40b132 |
ITR textequ <eax>
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_init_128 PROC
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov KEY, [esp + 1*4 + 0*4]
|
|
Packit |
40b132 |
mov KS, [esp + 1*4 + 1*4]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm1, [KEY]
|
|
Packit |
40b132 |
movdqu [KS], xmm1
|
|
Packit |
40b132 |
movdqa xmm2, xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea ITR, Lcon1
|
|
Packit |
40b132 |
movdqa xmm0, [ITR]
|
|
Packit |
40b132 |
lea ITR, Lmask
|
|
Packit |
40b132 |
movdqa xmm4, [ITR]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov ITR, 8
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
Lenc_128_ks_loop:
|
|
Packit |
40b132 |
lea KS, [16 + KS]
|
|
Packit |
40b132 |
dec ITR
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
pshufb xmm2, xmm4
|
|
Packit |
40b132 |
aesenclast xmm2, xmm0
|
|
Packit |
40b132 |
pslld xmm0, 1
|
|
Packit |
40b132 |
movdqa xmm3, xmm1
|
|
Packit |
40b132 |
pslldq xmm3, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm3
|
|
Packit |
40b132 |
pslldq xmm3, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm3
|
|
Packit |
40b132 |
pslldq xmm3, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm3
|
|
Packit |
40b132 |
pxor xmm1, xmm2
|
|
Packit |
40b132 |
movdqu [KS], xmm1
|
|
Packit |
40b132 |
movdqa xmm2, xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
jne Lenc_128_ks_loop
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea ITR, Lcon2
|
|
Packit |
40b132 |
movdqa xmm0, [ITR]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
pshufb xmm2, xmm4
|
|
Packit |
40b132 |
aesenclast xmm2, xmm0
|
|
Packit |
40b132 |
pslld xmm0, 1
|
|
Packit |
40b132 |
movdqa xmm3, xmm1
|
|
Packit |
40b132 |
pslldq xmm3, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm3
|
|
Packit |
40b132 |
pslldq xmm3, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm3
|
|
Packit |
40b132 |
pslldq xmm3, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm3
|
|
Packit |
40b132 |
pxor xmm1, xmm2
|
|
Packit |
40b132 |
movdqu [16 + KS], xmm1
|
|
Packit |
40b132 |
movdqa xmm2, xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
pshufb xmm2, xmm4
|
|
Packit |
40b132 |
aesenclast xmm2, xmm0
|
|
Packit |
40b132 |
movdqa xmm3, xmm1
|
|
Packit |
40b132 |
pslldq xmm3, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm3
|
|
Packit |
40b132 |
pslldq xmm3, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm3
|
|
Packit |
40b132 |
pslldq xmm3, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm3
|
|
Packit |
40b132 |
pxor xmm1, xmm2
|
|
Packit |
40b132 |
movdqu [32 + KS], xmm1
|
|
Packit |
40b132 |
movdqa xmm2, xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
intel_aes_encrypt_init_128 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_decrypt_init_128 PROC
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov KEY, [esp + 1*4 + 0*4]
|
|
Packit |
40b132 |
mov KS, [esp + 1*4 + 1*4]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
push KS
|
|
Packit |
40b132 |
push KEY
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
call intel_aes_encrypt_init_128
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
pop KEY
|
|
Packit |
40b132 |
pop KS
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [0*16 + KS]
|
|
Packit |
40b132 |
movdqu xmm1, [10*16 + KS]
|
|
Packit |
40b132 |
movdqu [10*16 + KS], xmm0
|
|
Packit |
40b132 |
movdqu [0*16 + KS], xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = 1
|
|
Packit |
40b132 |
WHILE i LT 5
|
|
Packit |
40b132 |
movdqu xmm0, [i*16 + KS]
|
|
Packit |
40b132 |
movdqu xmm1, [(10-i)*16 + KS]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
aesimc xmm0, xmm0
|
|
Packit |
40b132 |
aesimc xmm1, xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [(10-i)*16 + KS], xmm0
|
|
Packit |
40b132 |
movdqu [i*16 + KS], xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [5*16 + KS]
|
|
Packit |
40b132 |
aesimc xmm0, xmm0
|
|
Packit |
40b132 |
movdqu [5*16 + KS], xmm0
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
intel_aes_decrypt_init_128 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_init_192 PROC
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov KEY, [esp + 1*4 + 0*4]
|
|
Packit |
40b132 |
mov KS, [esp + 1*4 + 1*4]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
pxor xmm3, xmm3
|
|
Packit |
40b132 |
movdqu xmm1, [KEY]
|
|
Packit |
40b132 |
pinsrd xmm3, DWORD PTR [16 + KEY], 0
|
|
Packit |
40b132 |
pinsrd xmm3, DWORD PTR [20 + KEY], 1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [KS], xmm1
|
|
Packit |
40b132 |
movdqa xmm5, xmm3
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea ITR, Lcon1
|
|
Packit |
40b132 |
movdqu xmm0, [ITR]
|
|
Packit |
40b132 |
lea ITR, Lmask192
|
|
Packit |
40b132 |
movdqu xmm4, [ITR]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov ITR, 4
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
Lenc_192_ks_loop:
|
|
Packit |
40b132 |
movdqa xmm2, xmm3
|
|
Packit |
40b132 |
pshufb xmm2, xmm4
|
|
Packit |
40b132 |
aesenclast xmm2, xmm0
|
|
Packit |
40b132 |
pslld xmm0, 1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqa xmm6, xmm1
|
|
Packit |
40b132 |
movdqa xmm7, xmm3
|
|
Packit |
40b132 |
pslldq xmm6, 4
|
|
Packit |
40b132 |
pslldq xmm7, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm6
|
|
Packit |
40b132 |
pxor xmm3, xmm7
|
|
Packit |
40b132 |
pslldq xmm6, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm6
|
|
Packit |
40b132 |
pslldq xmm6, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm6
|
|
Packit |
40b132 |
pxor xmm1, xmm2
|
|
Packit |
40b132 |
pshufd xmm2, xmm1, 0ffh
|
|
Packit |
40b132 |
pxor xmm3, xmm2
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqa xmm6, xmm1
|
|
Packit |
40b132 |
shufpd xmm5, xmm1, 00h
|
|
Packit |
40b132 |
shufpd xmm6, xmm3, 01h
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [16 + KS], xmm5
|
|
Packit |
40b132 |
movdqu [32 + KS], xmm6
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqa xmm2, xmm3
|
|
Packit |
40b132 |
pshufb xmm2, xmm4
|
|
Packit |
40b132 |
aesenclast xmm2, xmm0
|
|
Packit |
40b132 |
pslld xmm0, 1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqa xmm6, xmm1
|
|
Packit |
40b132 |
movdqa xmm7, xmm3
|
|
Packit |
40b132 |
pslldq xmm6, 4
|
|
Packit |
40b132 |
pslldq xmm7, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm6
|
|
Packit |
40b132 |
pxor xmm3, xmm7
|
|
Packit |
40b132 |
pslldq xmm6, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm6
|
|
Packit |
40b132 |
pslldq xmm6, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm6
|
|
Packit |
40b132 |
pxor xmm1, xmm2
|
|
Packit |
40b132 |
pshufd xmm2, xmm1, 0ffh
|
|
Packit |
40b132 |
pxor xmm3, xmm2
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [48 + KS], xmm1
|
|
Packit |
40b132 |
movdqa xmm5, xmm3
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea KS, [48 + KS]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
dec ITR
|
|
Packit |
40b132 |
jnz Lenc_192_ks_loop
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [16 + KS], xmm5
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
intel_aes_encrypt_init_192 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_decrypt_init_192 PROC
|
|
Packit |
40b132 |
mov KEY, [esp + 1*4 + 0*4]
|
|
Packit |
40b132 |
mov KS, [esp + 1*4 + 1*4]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
push KS
|
|
Packit |
40b132 |
push KEY
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
call intel_aes_encrypt_init_192
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
pop KEY
|
|
Packit |
40b132 |
pop KS
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [0*16 + KS]
|
|
Packit |
40b132 |
movdqu xmm1, [12*16 + KS]
|
|
Packit |
40b132 |
movdqu [12*16 + KS], xmm0
|
|
Packit |
40b132 |
movdqu [0*16 + KS], xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = 1
|
|
Packit |
40b132 |
WHILE i LT 6
|
|
Packit |
40b132 |
movdqu xmm0, [i*16 + KS]
|
|
Packit |
40b132 |
movdqu xmm1, [(12-i)*16 + KS]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
aesimc xmm0, xmm0
|
|
Packit |
40b132 |
aesimc xmm1, xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [(12-i)*16 + KS], xmm0
|
|
Packit |
40b132 |
movdqu [i*16 + KS], xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [6*16 + KS]
|
|
Packit |
40b132 |
aesimc xmm0, xmm0
|
|
Packit |
40b132 |
movdqu [6*16 + KS], xmm0
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
intel_aes_decrypt_init_192 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_init_256 PROC
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov KEY, [esp + 1*4 + 0*4]
|
|
Packit |
40b132 |
mov KS, [esp + 1*4 + 1*4]
|
|
Packit |
40b132 |
movdqu xmm1, [16*0 + KEY]
|
|
Packit |
40b132 |
movdqu xmm3, [16*1 + KEY]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [16*0 + KS], xmm1
|
|
Packit |
40b132 |
movdqu [16*1 + KS], xmm3
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea ITR, Lcon1
|
|
Packit |
40b132 |
movdqu xmm0, [ITR]
|
|
Packit |
40b132 |
lea ITR, Lmask256
|
|
Packit |
40b132 |
movdqu xmm5, [ITR]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
pxor xmm6, xmm6
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov ITR, 6
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
Lenc_256_ks_loop:
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqa xmm2, xmm3
|
|
Packit |
40b132 |
pshufb xmm2, xmm5
|
|
Packit |
40b132 |
aesenclast xmm2, xmm0
|
|
Packit |
40b132 |
pslld xmm0, 1
|
|
Packit |
40b132 |
movdqa xmm4, xmm1
|
|
Packit |
40b132 |
pslldq xmm4, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm4
|
|
Packit |
40b132 |
pslldq xmm4, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm4
|
|
Packit |
40b132 |
pslldq xmm4, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm4
|
|
Packit |
40b132 |
pxor xmm1, xmm2
|
|
Packit |
40b132 |
movdqu [16*2 + KS], xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
pshufd xmm2, xmm1, 0ffh
|
|
Packit |
40b132 |
aesenclast xmm2, xmm6
|
|
Packit |
40b132 |
movdqa xmm4, xmm3
|
|
Packit |
40b132 |
pslldq xmm4, 4
|
|
Packit |
40b132 |
pxor xmm3, xmm4
|
|
Packit |
40b132 |
pslldq xmm4, 4
|
|
Packit |
40b132 |
pxor xmm3, xmm4
|
|
Packit |
40b132 |
pslldq xmm4, 4
|
|
Packit |
40b132 |
pxor xmm3, xmm4
|
|
Packit |
40b132 |
pxor xmm3, xmm2
|
|
Packit |
40b132 |
movdqu [16*3 + KS], xmm3
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea KS, [32 + KS]
|
|
Packit |
40b132 |
dec ITR
|
|
Packit |
40b132 |
jnz Lenc_256_ks_loop
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqa xmm2, xmm3
|
|
Packit |
40b132 |
pshufb xmm2, xmm5
|
|
Packit |
40b132 |
aesenclast xmm2, xmm0
|
|
Packit |
40b132 |
movdqa xmm4, xmm1
|
|
Packit |
40b132 |
pslldq xmm4, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm4
|
|
Packit |
40b132 |
pslldq xmm4, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm4
|
|
Packit |
40b132 |
pslldq xmm4, 4
|
|
Packit |
40b132 |
pxor xmm1, xmm4
|
|
Packit |
40b132 |
pxor xmm1, xmm2
|
|
Packit |
40b132 |
movdqu [16*2 + KS], xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
intel_aes_encrypt_init_256 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_decrypt_init_256 PROC
|
|
Packit |
40b132 |
mov KEY, [esp + 1*4 + 0*4]
|
|
Packit |
40b132 |
mov KS, [esp + 1*4 + 1*4]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
push KS
|
|
Packit |
40b132 |
push KEY
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
call intel_aes_encrypt_init_256
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
pop KEY
|
|
Packit |
40b132 |
pop KS
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [0*16 + KS]
|
|
Packit |
40b132 |
movdqu xmm1, [14*16 + KS]
|
|
Packit |
40b132 |
movdqu [14*16 + KS], xmm0
|
|
Packit |
40b132 |
movdqu [0*16 + KS], xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = 1
|
|
Packit |
40b132 |
WHILE i LT 7
|
|
Packit |
40b132 |
movdqu xmm0, [i*16 + KS]
|
|
Packit |
40b132 |
movdqu xmm1, [(14-i)*16 + KS]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
aesimc xmm0, xmm0
|
|
Packit |
40b132 |
aesimc xmm1, xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [(14-i)*16 + KS], xmm0
|
|
Packit |
40b132 |
movdqu [i*16 + KS], xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [7*16 + KS]
|
|
Packit |
40b132 |
aesimc xmm0, xmm0
|
|
Packit |
40b132 |
movdqu [7*16 + KS], xmm0
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
intel_aes_decrypt_init_256 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
gen_aes_cbc_enc_func MACRO rnds
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
LOCAL loop1
|
|
Packit |
40b132 |
LOCAL bail
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
push inputLen
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov ctx, [esp + 2*4 + 0*4]
|
|
Packit |
40b132 |
mov output, [esp + 2*4 + 1*4]
|
|
Packit |
40b132 |
mov input, [esp + 2*4 + 4*4]
|
|
Packit |
40b132 |
mov inputLen, [esp + 2*4 + 5*4]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea ctx, [44+ctx]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [-32+ctx]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm2, [0*16 + ctx]
|
|
Packit |
40b132 |
movdqu xmm3, [1*16 + ctx]
|
|
Packit |
40b132 |
movdqu xmm4, [2*16 + ctx]
|
|
Packit |
40b132 |
movdqu xmm5, [3*16 + ctx]
|
|
Packit |
40b132 |
movdqu xmm6, [4*16 + ctx]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
loop1:
|
|
Packit |
40b132 |
cmp inputLen, 1*16
|
|
Packit |
40b132 |
jb bail
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm1, [input]
|
|
Packit |
40b132 |
pxor xmm1, xmm2
|
|
Packit |
40b132 |
pxor xmm0, xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
aesenc xmm0, xmm3
|
|
Packit |
40b132 |
aesenc xmm0, xmm4
|
|
Packit |
40b132 |
aesenc xmm0, xmm5
|
|
Packit |
40b132 |
aesenc xmm0, xmm6
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = 5
|
|
Packit |
40b132 |
WHILE i LT rnds
|
|
Packit |
40b132 |
movdqu xmm7, [i*16 + ctx]
|
|
Packit |
40b132 |
aesenc xmm0, xmm7
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
movdqu xmm7, [rnds*16 + ctx]
|
|
Packit |
40b132 |
aesenclast xmm0, xmm7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [output], xmm0
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea input, [1*16 + input]
|
|
Packit |
40b132 |
lea output, [1*16 + output]
|
|
Packit |
40b132 |
sub inputLen, 1*16
|
|
Packit |
40b132 |
jmp loop1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
bail:
|
|
Packit |
40b132 |
movdqu [-32+ctx], xmm0
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
xor eax, eax
|
|
Packit |
40b132 |
pop inputLen
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
gen_aes_cbc_dec_func MACRO rnds
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
LOCAL loop7
|
|
Packit |
40b132 |
LOCAL loop1
|
|
Packit |
40b132 |
LOCAL dec1
|
|
Packit |
40b132 |
LOCAL bail
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
push inputLen
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov ctx, [esp + 2*4 + 0*4]
|
|
Packit |
40b132 |
mov output, [esp + 2*4 + 1*4]
|
|
Packit |
40b132 |
mov input, [esp + 2*4 + 4*4]
|
|
Packit |
40b132 |
mov inputLen, [esp + 2*4 + 5*4]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea ctx, [44+ctx]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
loop7:
|
|
Packit |
40b132 |
cmp inputLen, 7*16
|
|
Packit |
40b132 |
jb dec1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [0*16 + input]
|
|
Packit |
40b132 |
movdqu xmm1, [1*16 + input]
|
|
Packit |
40b132 |
movdqu xmm2, [2*16 + input]
|
|
Packit |
40b132 |
movdqu xmm3, [3*16 + input]
|
|
Packit |
40b132 |
movdqu xmm4, [4*16 + input]
|
|
Packit |
40b132 |
movdqu xmm5, [5*16 + input]
|
|
Packit |
40b132 |
movdqu xmm6, [6*16 + input]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm7, [0*16 + ctx]
|
|
Packit |
40b132 |
pxor xmm0, xmm7
|
|
Packit |
40b132 |
pxor xmm1, xmm7
|
|
Packit |
40b132 |
pxor xmm2, xmm7
|
|
Packit |
40b132 |
pxor xmm3, xmm7
|
|
Packit |
40b132 |
pxor xmm4, xmm7
|
|
Packit |
40b132 |
pxor xmm5, xmm7
|
|
Packit |
40b132 |
pxor xmm6, xmm7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = 1
|
|
Packit |
40b132 |
WHILE i LT rnds
|
|
Packit |
40b132 |
aes_dec_rnd i
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
aes_dec_last_rnd rnds
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm7, [-32 + ctx]
|
|
Packit |
40b132 |
pxor xmm0, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [0*16 + input]
|
|
Packit |
40b132 |
pxor xmm1, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [1*16 + input]
|
|
Packit |
40b132 |
pxor xmm2, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [2*16 + input]
|
|
Packit |
40b132 |
pxor xmm3, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [3*16 + input]
|
|
Packit |
40b132 |
pxor xmm4, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [4*16 + input]
|
|
Packit |
40b132 |
pxor xmm5, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [5*16 + input]
|
|
Packit |
40b132 |
pxor xmm6, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [6*16 + input]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [0*16 + output], xmm0
|
|
Packit |
40b132 |
movdqu [1*16 + output], xmm1
|
|
Packit |
40b132 |
movdqu [2*16 + output], xmm2
|
|
Packit |
40b132 |
movdqu [3*16 + output], xmm3
|
|
Packit |
40b132 |
movdqu [4*16 + output], xmm4
|
|
Packit |
40b132 |
movdqu [5*16 + output], xmm5
|
|
Packit |
40b132 |
movdqu [6*16 + output], xmm6
|
|
Packit |
40b132 |
movdqu [-32 + ctx], xmm7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea input, [7*16 + input]
|
|
Packit |
40b132 |
lea output, [7*16 + output]
|
|
Packit |
40b132 |
sub inputLen, 7*16
|
|
Packit |
40b132 |
jmp loop7
|
|
Packit |
40b132 |
dec1:
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm3, [-32 + ctx]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
loop1:
|
|
Packit |
40b132 |
cmp inputLen, 1*16
|
|
Packit |
40b132 |
jb bail
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [input]
|
|
Packit |
40b132 |
movdqa xmm4, xmm0
|
|
Packit |
40b132 |
movdqu xmm7, [0*16 + ctx]
|
|
Packit |
40b132 |
pxor xmm0, xmm7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = 1
|
|
Packit |
40b132 |
WHILE i LT rnds
|
|
Packit |
40b132 |
movdqu xmm7, [i*16 + ctx]
|
|
Packit |
40b132 |
aesdec xmm0, xmm7
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
movdqu xmm7, [rnds*16 + ctx]
|
|
Packit |
40b132 |
aesdeclast xmm0, xmm7
|
|
Packit |
40b132 |
pxor xmm3, xmm0
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [output], xmm3
|
|
Packit |
40b132 |
movdqa xmm3, xmm4
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea input, [1*16 + input]
|
|
Packit |
40b132 |
lea output, [1*16 + output]
|
|
Packit |
40b132 |
sub inputLen, 1*16
|
|
Packit |
40b132 |
jmp loop1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
bail:
|
|
Packit |
40b132 |
movdqu [-32 + ctx], xmm3
|
|
Packit |
40b132 |
xor eax, eax
|
|
Packit |
40b132 |
pop inputLen
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_cbc_128 PROC
|
|
Packit |
40b132 |
gen_aes_cbc_enc_func 10
|
|
Packit |
40b132 |
intel_aes_encrypt_cbc_128 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_cbc_192 PROC
|
|
Packit |
40b132 |
gen_aes_cbc_enc_func 12
|
|
Packit |
40b132 |
intel_aes_encrypt_cbc_192 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_cbc_256 PROC
|
|
Packit |
40b132 |
gen_aes_cbc_enc_func 14
|
|
Packit |
40b132 |
intel_aes_encrypt_cbc_256 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_decrypt_cbc_128 PROC
|
|
Packit |
40b132 |
gen_aes_cbc_dec_func 10
|
|
Packit |
40b132 |
intel_aes_decrypt_cbc_128 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_decrypt_cbc_192 PROC
|
|
Packit |
40b132 |
gen_aes_cbc_dec_func 12
|
|
Packit |
40b132 |
intel_aes_decrypt_cbc_192 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_decrypt_cbc_256 PROC
|
|
Packit |
40b132 |
gen_aes_cbc_dec_func 14
|
|
Packit |
40b132 |
intel_aes_decrypt_cbc_256 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ctrCtx textequ <esi>
|
|
Packit |
40b132 |
CTR textequ <ebx>
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
gen_aes_ctr_func MACRO rnds
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
LOCAL loop7
|
|
Packit |
40b132 |
LOCAL loop1
|
|
Packit |
40b132 |
LOCAL enc1
|
|
Packit |
40b132 |
LOCAL bail
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
push inputLen
|
|
Packit |
40b132 |
push ctrCtx
|
|
Packit |
40b132 |
push CTR
|
|
Packit |
40b132 |
push ebp
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov ctrCtx, [esp + 4*5 + 0*4]
|
|
Packit |
40b132 |
mov output, [esp + 4*5 + 1*4]
|
|
Packit |
40b132 |
mov input, [esp + 4*5 + 4*4]
|
|
Packit |
40b132 |
mov inputLen, [esp + 4*5 + 5*4]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov ctx, [4+ctrCtx]
|
|
Packit |
40b132 |
lea ctx, [44+ctx]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov ebp, esp
|
|
Packit |
40b132 |
sub esp, 7*16
|
|
Packit |
40b132 |
and esp, -16
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [8+ctrCtx]
|
|
Packit |
40b132 |
mov ctrCtx, [ctrCtx + 8 + 3*4]
|
|
Packit |
40b132 |
bswap ctrCtx
|
|
Packit |
40b132 |
movdqu xmm1, [ctx + 0*16]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
pxor xmm0, xmm1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqa [esp + 0*16], xmm0
|
|
Packit |
40b132 |
movdqa [esp + 1*16], xmm0
|
|
Packit |
40b132 |
movdqa [esp + 2*16], xmm0
|
|
Packit |
40b132 |
movdqa [esp + 3*16], xmm0
|
|
Packit |
40b132 |
movdqa [esp + 4*16], xmm0
|
|
Packit |
40b132 |
movdqa [esp + 5*16], xmm0
|
|
Packit |
40b132 |
movdqa [esp + 6*16], xmm0
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
inc ctrCtx
|
|
Packit |
40b132 |
mov CTR, ctrCtx
|
|
Packit |
40b132 |
bswap CTR
|
|
Packit |
40b132 |
xor CTR, [ctx + 3*4]
|
|
Packit |
40b132 |
mov [esp + 1*16 + 3*4], CTR
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
inc ctrCtx
|
|
Packit |
40b132 |
mov CTR, ctrCtx
|
|
Packit |
40b132 |
bswap CTR
|
|
Packit |
40b132 |
xor CTR, [ctx + 3*4]
|
|
Packit |
40b132 |
mov [esp + 2*16 + 3*4], CTR
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
inc ctrCtx
|
|
Packit |
40b132 |
mov CTR, ctrCtx
|
|
Packit |
40b132 |
bswap CTR
|
|
Packit |
40b132 |
xor CTR, [ctx + 3*4]
|
|
Packit |
40b132 |
mov [esp + 3*16 + 3*4], CTR
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
inc ctrCtx
|
|
Packit |
40b132 |
mov CTR, ctrCtx
|
|
Packit |
40b132 |
bswap CTR
|
|
Packit |
40b132 |
xor CTR, [ctx + 3*4]
|
|
Packit |
40b132 |
mov [esp + 4*16 + 3*4], CTR
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
inc ctrCtx
|
|
Packit |
40b132 |
mov CTR, ctrCtx
|
|
Packit |
40b132 |
bswap CTR
|
|
Packit |
40b132 |
xor CTR, [ctx + 3*4]
|
|
Packit |
40b132 |
mov [esp + 5*16 + 3*4], CTR
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
inc ctrCtx
|
|
Packit |
40b132 |
mov CTR, ctrCtx
|
|
Packit |
40b132 |
bswap CTR
|
|
Packit |
40b132 |
xor CTR, [ctx + 3*4]
|
|
Packit |
40b132 |
mov [esp + 6*16 + 3*4], CTR
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
loop7:
|
|
Packit |
40b132 |
cmp inputLen, 7*16
|
|
Packit |
40b132 |
jb loop1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [0*16 + esp]
|
|
Packit |
40b132 |
movdqu xmm1, [1*16 + esp]
|
|
Packit |
40b132 |
movdqu xmm2, [2*16 + esp]
|
|
Packit |
40b132 |
movdqu xmm3, [3*16 + esp]
|
|
Packit |
40b132 |
movdqu xmm4, [4*16 + esp]
|
|
Packit |
40b132 |
movdqu xmm5, [5*16 + esp]
|
|
Packit |
40b132 |
movdqu xmm6, [6*16 + esp]
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = 1
|
|
Packit |
40b132 |
WHILE i LE 7
|
|
Packit |
40b132 |
aes_rnd i
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
inc ctrCtx
|
|
Packit |
40b132 |
mov CTR, ctrCtx
|
|
Packit |
40b132 |
bswap CTR
|
|
Packit |
40b132 |
xor CTR, [ctx + 3*4]
|
|
Packit |
40b132 |
mov [esp + (i-1)*16 + 3*4], CTR
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
WHILE i LT rnds
|
|
Packit |
40b132 |
aes_rnd i
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
aes_last_rnd rnds
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm7, [0*16 + input]
|
|
Packit |
40b132 |
pxor xmm0, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [1*16 + input]
|
|
Packit |
40b132 |
pxor xmm1, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [2*16 + input]
|
|
Packit |
40b132 |
pxor xmm2, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [3*16 + input]
|
|
Packit |
40b132 |
pxor xmm3, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [4*16 + input]
|
|
Packit |
40b132 |
pxor xmm4, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [5*16 + input]
|
|
Packit |
40b132 |
pxor xmm5, xmm7
|
|
Packit |
40b132 |
movdqu xmm7, [6*16 + input]
|
|
Packit |
40b132 |
pxor xmm6, xmm7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu [0*16 + output], xmm0
|
|
Packit |
40b132 |
movdqu [1*16 + output], xmm1
|
|
Packit |
40b132 |
movdqu [2*16 + output], xmm2
|
|
Packit |
40b132 |
movdqu [3*16 + output], xmm3
|
|
Packit |
40b132 |
movdqu [4*16 + output], xmm4
|
|
Packit |
40b132 |
movdqu [5*16 + output], xmm5
|
|
Packit |
40b132 |
movdqu [6*16 + output], xmm6
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea input, [7*16 + input]
|
|
Packit |
40b132 |
lea output, [7*16 + output]
|
|
Packit |
40b132 |
sub inputLen, 7*16
|
|
Packit |
40b132 |
jmp loop7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
loop1:
|
|
Packit |
40b132 |
cmp inputLen, 1*16
|
|
Packit |
40b132 |
jb bail
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm0, [esp]
|
|
Packit |
40b132 |
add esp, 16
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
i = 1
|
|
Packit |
40b132 |
WHILE i LT rnds
|
|
Packit |
40b132 |
movdqu xmm7, [i*16 + ctx]
|
|
Packit |
40b132 |
aesenc xmm0, xmm7
|
|
Packit |
40b132 |
i = i+1
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
movdqu xmm7, [rnds*16 + ctx]
|
|
Packit |
40b132 |
aesenclast xmm0, xmm7
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
movdqu xmm7, [input]
|
|
Packit |
40b132 |
pxor xmm0, xmm7
|
|
Packit |
40b132 |
movdqu [output], xmm0
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
lea input, [1*16 + input]
|
|
Packit |
40b132 |
lea output, [1*16 + output]
|
|
Packit |
40b132 |
sub inputLen, 1*16
|
|
Packit |
40b132 |
jmp loop1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
bail:
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
mov ctrCtx, [ebp + 4*5 + 0*4]
|
|
Packit |
40b132 |
movdqu xmm0, [esp]
|
|
Packit |
40b132 |
movdqu xmm1, [ctx + 0*16]
|
|
Packit |
40b132 |
pxor xmm0, xmm1
|
|
Packit |
40b132 |
movdqu [8+ctrCtx], xmm0
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
xor eax, eax
|
|
Packit |
40b132 |
mov esp, ebp
|
|
Packit |
40b132 |
pop ebp
|
|
Packit |
40b132 |
pop CTR
|
|
Packit |
40b132 |
pop ctrCtx
|
|
Packit |
40b132 |
pop inputLen
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
ENDM
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_ctr_128 PROC
|
|
Packit |
40b132 |
gen_aes_ctr_func 10
|
|
Packit |
40b132 |
intel_aes_encrypt_ctr_128 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_ctr_192 PROC
|
|
Packit |
40b132 |
gen_aes_ctr_func 12
|
|
Packit |
40b132 |
intel_aes_encrypt_ctr_192 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
ALIGN 16
|
|
Packit |
40b132 |
intel_aes_encrypt_ctr_256 PROC
|
|
Packit |
40b132 |
gen_aes_ctr_func 14
|
|
Packit |
40b132 |
intel_aes_encrypt_ctr_256 ENDP
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
END
|