; LICENSE: ; This submission to NSS is to be made available under the terms of the ; Mozilla Public License, v. 2.0. You can obtain one at http: ; //mozilla.org/MPL/2.0/. ;############################################################################### ; Copyright(c) 2014, Intel Corp. ; Developers and authors: ; Shay Gueron and Vlad Krasnov ; Intel Corporation, Israel Development Centre, Haifa, Israel ; Please send feedback directly to crypto.feedback.alias@intel.com .DATA ALIGN 16 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh Lcon1 dd 1,1,1,1 Lcon2 dd 1bh,1bh,1bh,1bh .CODE ctx textequ output textequ input textequ inputLen textequ aes_rnd MACRO i movdqu xmm8, [i*16 + ctx] aesenc xmm0, xmm8 aesenc xmm1, xmm8 aesenc xmm2, xmm8 aesenc xmm3, xmm8 aesenc xmm4, xmm8 aesenc xmm5, xmm8 aesenc xmm6, xmm8 aesenc xmm7, xmm8 ENDM aes_last_rnd MACRO i movdqu xmm8, [i*16 + ctx] aesenclast xmm0, xmm8 aesenclast xmm1, xmm8 aesenclast xmm2, xmm8 aesenclast xmm3, xmm8 aesenclast xmm4, xmm8 aesenclast xmm5, xmm8 aesenclast xmm6, xmm8 aesenclast xmm7, xmm8 ENDM aes_dec_rnd MACRO i movdqu xmm8, [i*16 + ctx] aesdec xmm0, xmm8 aesdec xmm1, xmm8 aesdec xmm2, xmm8 aesdec xmm3, xmm8 aesdec xmm4, xmm8 aesdec xmm5, xmm8 aesdec xmm6, xmm8 aesdec xmm7, xmm8 ENDM aes_dec_last_rnd MACRO i movdqu xmm8, [i*16 + ctx] aesdeclast xmm0, xmm8 aesdeclast xmm1, xmm8 aesdeclast xmm2, xmm8 aesdeclast xmm3, xmm8 aesdeclast xmm4, xmm8 aesdeclast xmm5, xmm8 aesdeclast xmm6, xmm8 aesdeclast xmm7, xmm8 ENDM gen_aes_ecb_func MACRO enc, rnds LOCAL loop8 LOCAL loop1 LOCAL bail xor inputLen, inputLen mov input, [rsp + 1*8 + 8*4] mov inputLen, [rsp + 1*8 + 8*5] sub rsp, 3*16 movdqu [rsp + 0*16], xmm6 movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 lea ctx, [48+ctx] loop8: cmp inputLen, 8*16 jb loop1 movdqu xmm0, [0*16 + input] movdqu xmm1, [1*16 + input] movdqu xmm2, [2*16 + input] movdqu xmm3, [3*16 + input] movdqu xmm4, [4*16 + input] movdqu xmm5, [5*16 + input] movdqu xmm6, [6*16 + input] movdqu xmm7, [7*16 + input] movdqu xmm8, [0*16 + ctx] pxor xmm0, xmm8 pxor xmm1, xmm8 pxor xmm2, xmm8 pxor xmm3, xmm8 pxor xmm4, xmm8 pxor xmm5, xmm8 pxor xmm6, xmm8 pxor xmm7, xmm8 IF enc eq 1 rnd textequ lastrnd textequ aesinst textequ aeslastinst textequ ELSE rnd textequ lastrnd textequ aesinst textequ aeslastinst textequ ENDIF i = 1 WHILE i LT rnds rnd i i = i+1 ENDM lastrnd rnds movdqu [0*16 + output], xmm0 movdqu [1*16 + output], xmm1 movdqu [2*16 + output], xmm2 movdqu [3*16 + output], xmm3 movdqu [4*16 + output], xmm4 movdqu [5*16 + output], xmm5 movdqu [6*16 + output], xmm6 movdqu [7*16 + output], xmm7 lea input, [8*16 + input] lea output, [8*16 + output] sub inputLen, 8*16 jmp loop8 loop1: cmp inputLen, 1*16 jb bail movdqu xmm0, [input] movdqu xmm7, [0*16 + ctx] pxor xmm0, xmm7 i = 1 WHILE i LT rnds movdqu xmm7, [i*16 + ctx] aesinst xmm0, xmm7 i = i+1 ENDM movdqu xmm7, [rnds*16 + ctx] aeslastinst xmm0, xmm7 movdqu [output], xmm0 lea input, [1*16 + input] lea output, [1*16 + output] sub inputLen, 1*16 jmp loop1 bail: xor rax, rax movdqu xmm6, [rsp + 0*16] movdqu xmm7, [rsp + 1*16] movdqu xmm8, [rsp + 2*16] add rsp, 3*16 ret ENDM intel_aes_encrypt_ecb_128 PROC gen_aes_ecb_func 1, 10 intel_aes_encrypt_ecb_128 ENDP intel_aes_encrypt_ecb_192 PROC gen_aes_ecb_func 1, 12 intel_aes_encrypt_ecb_192 ENDP intel_aes_encrypt_ecb_256 PROC gen_aes_ecb_func 1, 14 intel_aes_encrypt_ecb_256 ENDP intel_aes_decrypt_ecb_128 PROC gen_aes_ecb_func 0, 10 intel_aes_decrypt_ecb_128 ENDP intel_aes_decrypt_ecb_192 PROC gen_aes_ecb_func 0, 12 intel_aes_decrypt_ecb_192 ENDP intel_aes_decrypt_ecb_256 PROC gen_aes_ecb_func 0, 14 intel_aes_decrypt_ecb_256 ENDP KEY textequ KS textequ ITR textequ intel_aes_encrypt_init_128 PROC movdqu xmm1, [KEY] movdqu [KS], xmm1 movdqa xmm2, xmm1 lea ITR, Lcon1 movdqa xmm0, [ITR] lea ITR, Lmask movdqa xmm4, [ITR] mov ITR, 8 Lenc_128_ks_loop: lea KS, [16 + KS] dec ITR pshufb xmm2, xmm4 aesenclast xmm2, xmm0 pslld xmm0, 1 movdqa xmm3, xmm1 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu [KS], xmm1 movdqa xmm2, xmm1 jne Lenc_128_ks_loop lea ITR, Lcon2 movdqa xmm0, [ITR] pshufb xmm2, xmm4 aesenclast xmm2, xmm0 pslld xmm0, 1 movdqa xmm3, xmm1 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu [16 + KS], xmm1 movdqa xmm2, xmm1 pshufb xmm2, xmm4 aesenclast xmm2, xmm0 movdqa xmm3, xmm1 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu [32 + KS], xmm1 movdqa xmm2, xmm1 ret intel_aes_encrypt_init_128 ENDP intel_aes_decrypt_init_128 PROC push KS push KEY call intel_aes_encrypt_init_128 pop KEY pop KS movdqu xmm0, [0*16 + KS] movdqu xmm1, [10*16 + KS] movdqu [10*16 + KS], xmm0 movdqu [0*16 + KS], xmm1 i = 1 WHILE i LT 5 movdqu xmm0, [i*16 + KS] movdqu xmm1, [(10-i)*16 + KS] aesimc xmm0, xmm0 aesimc xmm1, xmm1 movdqu [(10-i)*16 + KS], xmm0 movdqu [i*16 + KS], xmm1 i = i+1 ENDM movdqu xmm0, [5*16 + KS] aesimc xmm0, xmm0 movdqu [5*16 + KS], xmm0 ret intel_aes_decrypt_init_128 ENDP intel_aes_encrypt_init_192 PROC sub rsp, 16*2 movdqu [16*0 + rsp], xmm6 movdqu [16*1 + rsp], xmm7 movdqu xmm1, [KEY] mov ITR, [16 + KEY] movd xmm3, ITR movdqu [KS], xmm1 movdqa xmm5, xmm3 lea ITR, Lcon1 movdqu xmm0, [ITR] lea ITR, Lmask192 movdqu xmm4, [ITR] mov ITR, 4 Lenc_192_ks_loop: movdqa xmm2, xmm3 pshufb xmm2, xmm4 aesenclast xmm2, xmm0 pslld xmm0, 1 movdqa xmm6, xmm1 movdqa xmm7, xmm3 pslldq xmm6, 4 pslldq xmm7, 4 pxor xmm1, xmm6 pxor xmm3, xmm7 pslldq xmm6, 4 pxor xmm1, xmm6 pslldq xmm6, 4 pxor xmm1, xmm6 pxor xmm1, xmm2 pshufd xmm2, xmm1, 0ffh pxor xmm3, xmm2 movdqa xmm6, xmm1 shufpd xmm5, xmm1, 00h shufpd xmm6, xmm3, 01h movdqu [16 + KS], xmm5 movdqu [32 + KS], xmm6 movdqa xmm2, xmm3 pshufb xmm2, xmm4 aesenclast xmm2, xmm0 pslld xmm0, 1 movdqa xmm6, xmm1 movdqa xmm7, xmm3 pslldq xmm6, 4 pslldq xmm7, 4 pxor xmm1, xmm6 pxor xmm3, xmm7 pslldq xmm6, 4 pxor xmm1, xmm6 pslldq xmm6, 4 pxor xmm1, xmm6 pxor xmm1, xmm2 pshufd xmm2, xmm1, 0ffh pxor xmm3, xmm2 movdqu [48 + KS], xmm1 movdqa xmm5, xmm3 lea KS, [48 + KS] dec ITR jnz Lenc_192_ks_loop movdqu [16 + KS], xmm5 movdqu xmm7, [16*1 + rsp] movdqu xmm6, [16*0 + rsp] add rsp, 16*2 ret intel_aes_encrypt_init_192 ENDP intel_aes_decrypt_init_192 PROC push KS push KEY call intel_aes_encrypt_init_192 pop KEY pop KS movdqu xmm0, [0*16 + KS] movdqu xmm1, [12*16 + KS] movdqu [12*16 + KS], xmm0 movdqu [0*16 + KS], xmm1 i = 1 WHILE i LT 6 movdqu xmm0, [i*16 + KS] movdqu xmm1, [(12-i)*16 + KS] aesimc xmm0, xmm0 aesimc xmm1, xmm1 movdqu [(12-i)*16 + KS], xmm0 movdqu [i*16 + KS], xmm1 i = i+1 ENDM movdqu xmm0, [6*16 + KS] aesimc xmm0, xmm0 movdqu [6*16 + KS], xmm0 ret intel_aes_decrypt_init_192 ENDP intel_aes_encrypt_init_256 PROC sub rsp, 16*2 movdqu [16*0 + rsp], xmm6 movdqu [16*1 + rsp], xmm7 movdqu xmm1, [16*0 + KEY] movdqu xmm3, [16*1 + KEY] movdqu [16*0 + KS], xmm1 movdqu [16*1 + KS], xmm3 lea ITR, Lcon1 movdqu xmm0, [ITR] lea ITR, Lmask256 movdqu xmm5, [ITR] pxor xmm6, xmm6 mov ITR, 6 Lenc_256_ks_loop: movdqa xmm2, xmm3 pshufb xmm2, xmm5 aesenclast xmm2, xmm0 pslld xmm0, 1 movdqa xmm4, xmm1 pslldq xmm4, 4 pxor xmm1, xmm4 pslldq xmm4, 4 pxor xmm1, xmm4 pslldq xmm4, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu [16*2 + KS], xmm1 pshufd xmm2, xmm1, 0ffh aesenclast xmm2, xmm6 movdqa xmm4, xmm3 pslldq xmm4, 4 pxor xmm3, xmm4 pslldq xmm4, 4 pxor xmm3, xmm4 pslldq xmm4, 4 pxor xmm3, xmm4 pxor xmm3, xmm2 movdqu [16*3 + KS], xmm3 lea KS, [32 + KS] dec ITR jnz Lenc_256_ks_loop movdqa xmm2, xmm3 pshufb xmm2, xmm5 aesenclast xmm2, xmm0 movdqa xmm4, xmm1 pslldq xmm4, 4 pxor xmm1, xmm4 pslldq xmm4, 4 pxor xmm1, xmm4 pslldq xmm4, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu [16*2 + KS], xmm1 movdqu xmm7, [16*1 + rsp] movdqu xmm6, [16*0 + rsp] add rsp, 16*2 ret intel_aes_encrypt_init_256 ENDP intel_aes_decrypt_init_256 PROC push KS push KEY call intel_aes_encrypt_init_256 pop KEY pop KS movdqu xmm0, [0*16 + KS] movdqu xmm1, [14*16 + KS] movdqu [14*16 + KS], xmm0 movdqu [0*16 + KS], xmm1 i = 1 WHILE i LT 7 movdqu xmm0, [i*16 + KS] movdqu xmm1, [(14-i)*16 + KS] aesimc xmm0, xmm0 aesimc xmm1, xmm1 movdqu [(14-i)*16 + KS], xmm0 movdqu [i*16 + KS], xmm1 i = i+1 ENDM movdqu xmm0, [7*16 + KS] aesimc xmm0, xmm0 movdqu [7*16 + KS], xmm0 ret intel_aes_decrypt_init_256 ENDP gen_aes_cbc_enc_func MACRO rnds LOCAL loop1 LOCAL bail mov input, [rsp + 1*8 + 8*4] mov inputLen, [rsp + 1*8 + 8*5] sub rsp, 3*16 movdqu [rsp + 0*16], xmm6 movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 lea ctx, [48+ctx] movdqu xmm0, [-32+ctx] movdqu xmm2, [0*16 + ctx] movdqu xmm3, [1*16 + ctx] movdqu xmm4, [2*16 + ctx] movdqu xmm5, [3*16 + ctx] movdqu xmm6, [4*16 + ctx] movdqu xmm7, [5*16 + ctx] loop1: cmp inputLen, 1*16 jb bail movdqu xmm1, [input] pxor xmm1, xmm2 pxor xmm0, xmm1 aesenc xmm0, xmm3 aesenc xmm0, xmm4 aesenc xmm0, xmm5 aesenc xmm0, xmm6 aesenc xmm0, xmm7 i = 6 WHILE i LT rnds movdqu xmm8, [i*16 + ctx] aesenc xmm0, xmm8 i = i+1 ENDM movdqu xmm8, [rnds*16 + ctx] aesenclast xmm0, xmm8 movdqu [output], xmm0 lea input, [1*16 + input] lea output, [1*16 + output] sub inputLen, 1*16 jmp loop1 bail: movdqu [-32+ctx], xmm0 xor rax, rax movdqu xmm6, [rsp + 0*16] movdqu xmm7, [rsp + 1*16] movdqu xmm8, [rsp + 2*16] add rsp, 3*16 ret ENDM gen_aes_cbc_dec_func MACRO rnds LOCAL loop8 LOCAL loop1 LOCAL dec1 LOCAL bail mov input, [rsp + 1*8 + 8*4] mov inputLen, [rsp + 1*8 + 8*5] sub rsp, 3*16 movdqu [rsp + 0*16], xmm6 movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 lea ctx, [48+ctx] loop8: cmp inputLen, 8*16 jb dec1 movdqu xmm0, [0*16 + input] movdqu xmm1, [1*16 + input] movdqu xmm2, [2*16 + input] movdqu xmm3, [3*16 + input] movdqu xmm4, [4*16 + input] movdqu xmm5, [5*16 + input] movdqu xmm6, [6*16 + input] movdqu xmm7, [7*16 + input] movdqu xmm8, [0*16 + ctx] pxor xmm0, xmm8 pxor xmm1, xmm8 pxor xmm2, xmm8 pxor xmm3, xmm8 pxor xmm4, xmm8 pxor xmm5, xmm8 pxor xmm6, xmm8 pxor xmm7, xmm8 i = 1 WHILE i LT rnds aes_dec_rnd i i = i+1 ENDM aes_dec_last_rnd rnds movdqu xmm8, [-32 + ctx] pxor xmm0, xmm8 movdqu xmm8, [0*16 + input] pxor xmm1, xmm8 movdqu xmm8, [1*16 + input] pxor xmm2, xmm8 movdqu xmm8, [2*16 + input] pxor xmm3, xmm8 movdqu xmm8, [3*16 + input] pxor xmm4, xmm8 movdqu xmm8, [4*16 + input] pxor xmm5, xmm8 movdqu xmm8, [5*16 + input] pxor xmm6, xmm8 movdqu xmm8, [6*16 + input] pxor xmm7, xmm8 movdqu xmm8, [7*16 + input] movdqu [0*16 + output], xmm0 movdqu [1*16 + output], xmm1 movdqu [2*16 + output], xmm2 movdqu [3*16 + output], xmm3 movdqu [4*16 + output], xmm4 movdqu [5*16 + output], xmm5 movdqu [6*16 + output], xmm6 movdqu [7*16 + output], xmm7 movdqu [-32 + ctx], xmm8 lea input, [8*16 + input] lea output, [8*16 + output] sub inputLen, 8*16 jmp loop8 dec1: movdqu xmm3, [-32 + ctx] loop1: cmp inputLen, 1*16 jb bail movdqu xmm0, [input] movdqa xmm4, xmm0 movdqu xmm7, [0*16 + ctx] pxor xmm0, xmm7 i = 1 WHILE i LT rnds movdqu xmm7, [i*16 + ctx] aesdec xmm0, xmm7 i = i+1 ENDM movdqu xmm7, [rnds*16 + ctx] aesdeclast xmm0, xmm7 pxor xmm3, xmm0 movdqu [output], xmm3 movdqa xmm3, xmm4 lea input, [1*16 + input] lea output, [1*16 + output] sub inputLen, 1*16 jmp loop1 bail: movdqu [-32 + ctx], xmm3 xor rax, rax movdqu xmm6, [rsp + 0*16] movdqu xmm7, [rsp + 1*16] movdqu xmm8, [rsp + 2*16] add rsp, 3*16 ret ENDM intel_aes_encrypt_cbc_128 PROC gen_aes_cbc_enc_func 10 intel_aes_encrypt_cbc_128 ENDP intel_aes_encrypt_cbc_192 PROC gen_aes_cbc_enc_func 12 intel_aes_encrypt_cbc_192 ENDP intel_aes_encrypt_cbc_256 PROC gen_aes_cbc_enc_func 14 intel_aes_encrypt_cbc_256 ENDP intel_aes_decrypt_cbc_128 PROC gen_aes_cbc_dec_func 10 intel_aes_decrypt_cbc_128 ENDP intel_aes_decrypt_cbc_192 PROC gen_aes_cbc_dec_func 12 intel_aes_decrypt_cbc_192 ENDP intel_aes_decrypt_cbc_256 PROC gen_aes_cbc_dec_func 14 intel_aes_decrypt_cbc_256 ENDP ctrCtx textequ CTR textequ CTRSave textequ gen_aes_ctr_func MACRO rnds LOCAL loop8 LOCAL loop1 LOCAL enc1 LOCAL bail mov input, [rsp + 8*1 + 4*8] mov inputLen, [rsp + 8*1 + 5*8] mov ctrCtx, ctx mov ctx, [8+ctrCtx] lea ctx, [48+ctx] sub rsp, 3*16 movdqu [rsp + 0*16], xmm6 movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 push rbp mov rbp, rsp sub rsp, 8*16 and rsp, -16 movdqu xmm0, [16+ctrCtx] mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4] bswap CTRSave movdqu xmm1, [ctx + 0*16] pxor xmm0, xmm1 movdqa [rsp + 0*16], xmm0 movdqa [rsp + 1*16], xmm0 movdqa [rsp + 2*16], xmm0 movdqa [rsp + 3*16], xmm0 movdqa [rsp + 4*16], xmm0 movdqa [rsp + 5*16], xmm0 movdqa [rsp + 6*16], xmm0 movdqa [rsp + 7*16], xmm0 inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 1*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 2*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 3*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 4*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 5*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 6*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 7*16 + 3*4], CTR loop8: cmp inputLen, 8*16 jb loop1 movdqu xmm0, [0*16 + rsp] movdqu xmm1, [1*16 + rsp] movdqu xmm2, [2*16 + rsp] movdqu xmm3, [3*16 + rsp] movdqu xmm4, [4*16 + rsp] movdqu xmm5, [5*16 + rsp] movdqu xmm6, [6*16 + rsp] movdqu xmm7, [7*16 + rsp] i = 1 WHILE i LE 8 aes_rnd i inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR i = i+1 ENDM WHILE i LT rnds aes_rnd i i = i+1 ENDM aes_last_rnd rnds movdqu xmm8, [0*16 + input] pxor xmm0, xmm8 movdqu xmm8, [1*16 + input] pxor xmm1, xmm8 movdqu xmm8, [2*16 + input] pxor xmm2, xmm8 movdqu xmm8, [3*16 + input] pxor xmm3, xmm8 movdqu xmm8, [4*16 + input] pxor xmm4, xmm8 movdqu xmm8, [5*16 + input] pxor xmm5, xmm8 movdqu xmm8, [6*16 + input] pxor xmm6, xmm8 movdqu xmm8, [7*16 + input] pxor xmm7, xmm8 movdqu [0*16 + output], xmm0 movdqu [1*16 + output], xmm1 movdqu [2*16 + output], xmm2 movdqu [3*16 + output], xmm3 movdqu [4*16 + output], xmm4 movdqu [5*16 + output], xmm5 movdqu [6*16 + output], xmm6 movdqu [7*16 + output], xmm7 lea input, [8*16 + input] lea output, [8*16 + output] sub inputLen, 8*16 jmp loop8 loop1: cmp inputLen, 1*16 jb bail movdqu xmm0, [rsp] add rsp, 16 i = 1 WHILE i LT rnds movdqu xmm7, [i*16 + ctx] aesenc xmm0, xmm7 i = i+1 ENDM movdqu xmm7, [rnds*16 + ctx] aesenclast xmm0, xmm7 movdqu xmm7, [input] pxor xmm0, xmm7 movdqu [output], xmm0 lea input, [1*16 + input] lea output, [1*16 + output] sub inputLen, 1*16 jmp loop1 bail: movdqu xmm0, [rsp] movdqu xmm1, [ctx + 0*16] pxor xmm0, xmm1 movdqu [16+ctrCtx], xmm0 xor rax, rax mov rsp, rbp pop rbp movdqu xmm6, [rsp + 0*16] movdqu xmm7, [rsp + 1*16] movdqu xmm8, [rsp + 2*16] add rsp, 3*16 ret ENDM intel_aes_encrypt_ctr_128 PROC gen_aes_ctr_func 10 intel_aes_encrypt_ctr_128 ENDP intel_aes_encrypt_ctr_192 PROC gen_aes_ctr_func 12 intel_aes_encrypt_ctr_192 ENDP intel_aes_encrypt_ctr_256 PROC gen_aes_ctr_func 14 intel_aes_encrypt_ctr_256 ENDP END